135c7bb34SRandall Stewart /*- 2963fb2adSRandall Stewart * Copyright (c) 2016-2020 Netflix, Inc. 335c7bb34SRandall Stewart * 435c7bb34SRandall Stewart * Redistribution and use in source and binary forms, with or without 535c7bb34SRandall Stewart * modification, are permitted provided that the following conditions 635c7bb34SRandall Stewart * are met: 735c7bb34SRandall Stewart * 1. Redistributions of source code must retain the above copyright 835c7bb34SRandall Stewart * notice, this list of conditions and the following disclaimer. 935c7bb34SRandall Stewart * 2. Redistributions in binary form must reproduce the above copyright 1035c7bb34SRandall Stewart * notice, this list of conditions and the following disclaimer in the 1135c7bb34SRandall Stewart * documentation and/or other materials provided with the distribution. 1235c7bb34SRandall Stewart * 1335c7bb34SRandall Stewart * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1435c7bb34SRandall Stewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1535c7bb34SRandall Stewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1635c7bb34SRandall Stewart * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 1735c7bb34SRandall Stewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1835c7bb34SRandall Stewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1935c7bb34SRandall Stewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2035c7bb34SRandall Stewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2135c7bb34SRandall Stewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2235c7bb34SRandall Stewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2335c7bb34SRandall Stewart * SUCH DAMAGE. 2435c7bb34SRandall Stewart * 2535c7bb34SRandall Stewart * $FreeBSD$ 2635c7bb34SRandall Stewart */ 2735c7bb34SRandall Stewart 2835c7bb34SRandall Stewart #ifndef _NETINET_TCP_BBR_H_ 2935c7bb34SRandall Stewart #define _NETINET_TCP_BBR_H_ 3035c7bb34SRandall Stewart 3135c7bb34SRandall Stewart #define BBR_INITIAL_RTO 1000000 /* 1 second in micro-seconds */ 3235c7bb34SRandall Stewart /* Send map flags */ 3335c7bb34SRandall Stewart #define BBR_ACKED 0x0001 /* The remote endpoint acked this */ 3435c7bb34SRandall Stewart #define BBR_WAS_RENEGED 0x0002 /* The peer reneged the ack */ 3535c7bb34SRandall Stewart #define BBR_RXT_CLEARED 0x0004 /* ACK Cleared by the RXT timer */ 3635c7bb34SRandall Stewart #define BBR_OVERMAX 0x0008 /* We have more retran's then we can 3735c7bb34SRandall Stewart * fit */ 3835c7bb34SRandall Stewart #define BBR_SACK_PASSED 0x0010 /* A sack was done above this block */ 3935c7bb34SRandall Stewart #define BBR_WAS_SACKPASS 0x0020 /* We retransmitted due to SACK pass */ 4035c7bb34SRandall Stewart #define BBR_HAS_FIN 0x0040 /* segment is sent with fin */ 4135c7bb34SRandall Stewart #define BBR_TLP 0x0080 /* segment sent as tail-loss-probe */ 4235c7bb34SRandall Stewart #define BBR_HAS_SYN 0x0100 /* segment has the syn */ 4335c7bb34SRandall Stewart #define BBR_MARKED_LOST 0x0200 /* 4435c7bb34SRandall Stewart * This segments is lost and 4535c7bb34SRandall Stewart * totaled into bbr->rc_ctl.rc_lost 4635c7bb34SRandall Stewart */ 4735c7bb34SRandall Stewart #define BBR_RWND_COLLAPSED 0x0400 /* The peer collapsed the rwnd on the segment */ 4835c7bb34SRandall Stewart #define BBR_NUM_OF_RETRANS 7 4935c7bb34SRandall Stewart 5035c7bb34SRandall Stewart /* Defines for socket options to set pacing overheads */ 5135c7bb34SRandall Stewart #define BBR_INCL_ENET_OH 0x01 5235c7bb34SRandall Stewart #define BBR_INCL_IP_OH 0x02 5335c7bb34SRandall Stewart #define BBR_INCL_TCP_OH 0x03 5435c7bb34SRandall Stewart 5535c7bb34SRandall Stewart /* 5635c7bb34SRandall Stewart * With the addition of both measurement algorithms 5735c7bb34SRandall Stewart * I had to move over the size of a 5835c7bb34SRandall Stewart * cache line (unfortunately). For now there is 5935c7bb34SRandall Stewart * no way around this. We may be able to cut back 6035c7bb34SRandall Stewart * at some point I hope. 6135c7bb34SRandall Stewart */ 6235c7bb34SRandall Stewart struct bbr_sendmap { 6335c7bb34SRandall Stewart TAILQ_ENTRY(bbr_sendmap) r_next; /* seq number arrayed next */ 6435c7bb34SRandall Stewart TAILQ_ENTRY(bbr_sendmap) r_tnext; /* Time of tmit based next */ 6535c7bb34SRandall Stewart uint32_t r_start; /* Sequence number of the segment */ 6635c7bb34SRandall Stewart uint32_t r_end; /* End seq, this is 1 beyond actually */ 6735c7bb34SRandall Stewart 6835c7bb34SRandall Stewart uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ 6935c7bb34SRandall Stewart uint32_t r_delivered; /* Delivered amount at send */ 7035c7bb34SRandall Stewart 7135c7bb34SRandall Stewart uint32_t r_del_time; /* The time of the last delivery update */ 7235c7bb34SRandall Stewart uint8_t r_rtr_cnt:4, /* Retran count, index this -1 to get time 7335c7bb34SRandall Stewart * sent */ 7435c7bb34SRandall Stewart unused_bit:1, 7535c7bb34SRandall Stewart r_is_drain:1, /* In a draining cycle */ 7635c7bb34SRandall Stewart r_app_limited:1,/* We went app limited */ 7735c7bb34SRandall Stewart r_ts_valid:1; /* Timestamp field is valid (r_del_ack_ts) */ 7835c7bb34SRandall Stewart uint8_t r_dupack; /* Dup ack count */ 7935c7bb34SRandall Stewart uint8_t r_in_tmap:1, /* Flag to see if its in the r_tnext array */ 8035c7bb34SRandall Stewart r_is_smallmap:1,/* Was logged as a small-map send-map item */ 8135c7bb34SRandall Stewart r_is_gain:1, /* Was in gain cycle */ 8235c7bb34SRandall Stewart r_bbr_state:5; /* The BBR state at send */ 8335c7bb34SRandall Stewart uint8_t r_limit_type; /* is this entry counted against a limit? */ 8435c7bb34SRandall Stewart 8535c7bb34SRandall Stewart uint16_t r_flags; /* Flags as defined above */ 8635c7bb34SRandall Stewart uint16_t r_spare16; 8735c7bb34SRandall Stewart uint32_t r_del_ack_ts; /* At send what timestamp of peer was (if r_ts_valid set) */ 8835c7bb34SRandall Stewart /****************Cache line*****************/ 8935c7bb34SRandall Stewart uint32_t r_tim_lastsent[BBR_NUM_OF_RETRANS]; 9035c7bb34SRandall Stewart /* 9135c7bb34SRandall Stewart * Question, should we instead just grab the sending b/w 9235c7bb34SRandall Stewart * from the filter with the gain and store it in a 9335c7bb34SRandall Stewart * uint64_t instead? 9435c7bb34SRandall Stewart */ 9535c7bb34SRandall Stewart uint32_t r_first_sent_time; /* Time of first pkt in flight sent */ 9635c7bb34SRandall Stewart uint32_t r_pacing_delay; /* pacing delay of this send */ 9735c7bb34SRandall Stewart uint32_t r_flight_at_send; /* flight at the time of the send */ 9835c7bb34SRandall Stewart #ifdef _KERNEL 9935c7bb34SRandall Stewart } __aligned(CACHE_LINE_SIZE); 10035c7bb34SRandall Stewart #else 10135c7bb34SRandall Stewart }; 10235c7bb34SRandall Stewart #endif 10335c7bb34SRandall Stewart #define BBR_LIMIT_TYPE_SPLIT 1 10435c7bb34SRandall Stewart 10535c7bb34SRandall Stewart TAILQ_HEAD(bbr_head, bbr_sendmap); 10635c7bb34SRandall Stewart 10735c7bb34SRandall Stewart #define BBR_SEGMENT_TIME_SIZE 1500 /* How many bytes in time_between */ 10835c7bb34SRandall Stewart 10935c7bb34SRandall Stewart #define BBR_MIN_SEG 1460 /* MSS size */ 11035c7bb34SRandall Stewart #define BBR_MAX_GAIN_VALUE 0xffff 11135c7bb34SRandall Stewart 11235c7bb34SRandall Stewart #define BBR_TIMER_FUDGE 1500 /* 1.5ms in micro seconds */ 11335c7bb34SRandall Stewart 11435c7bb34SRandall Stewart /* BW twiddle secret codes */ 11535c7bb34SRandall Stewart #define BBR_RED_BW_CONGSIG 0 /* We enter recovery and set using b/w */ 11635c7bb34SRandall Stewart #define BBR_RED_BW_RATECAL 1 /* We are calculating the loss rate */ 11735c7bb34SRandall Stewart #define BBR_RED_BW_USELRBW 2 /* We are dropping the lower b/w with 11835c7bb34SRandall Stewart * cDR */ 11935c7bb34SRandall Stewart #define BBR_RED_BW_SETHIGHLOSS 3 /* We have set our highloss value at 12035c7bb34SRandall Stewart * exit from probe-rtt */ 12135c7bb34SRandall Stewart #define BBR_RED_BW_PE_CLREARLY 4 /* We have decided to clear the 12235c7bb34SRandall Stewart * reduction early */ 12335c7bb34SRandall Stewart #define BBR_RED_BW_PE_CLAFDEL 5 /* We are clearing it on schedule 12435c7bb34SRandall Stewart * delayed */ 12535c7bb34SRandall Stewart #define BBR_RED_BW_REC_ENDCLL 6 /* Recover exits save high if needed 12635c7bb34SRandall Stewart * an clear to start measuring */ 12735c7bb34SRandall Stewart #define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not 12835c7bb34SRandall Stewart * get out of jail early */ 12935c7bb34SRandall Stewart /* For calculating a rate */ 13035c7bb34SRandall Stewart #define BBR_CALC_BW 1 13135c7bb34SRandall Stewart #define BBR_CALC_LOSS 2 13235c7bb34SRandall Stewart 13335c7bb34SRandall Stewart #define BBR_RTT_BY_TIMESTAMP 0 13435c7bb34SRandall Stewart #define BBR_RTT_BY_EXACTMATCH 1 13535c7bb34SRandall Stewart #define BBR_RTT_BY_EARLIER_RET 2 13635c7bb34SRandall Stewart #define BBR_RTT_BY_THIS_RETRAN 3 13735c7bb34SRandall Stewart #define BBR_RTT_BY_SOME_RETRAN 4 13835c7bb34SRandall Stewart #define BBR_RTT_BY_TSMATCHING 5 13935c7bb34SRandall Stewart 14035c7bb34SRandall Stewart /* Markers to track where we enter persists from */ 14135c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_1 1 14235c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_2 2 14335c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_3 3 14435c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_4 4 14535c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_5 5 14635c7bb34SRandall Stewart 14735c7bb34SRandall Stewart /* magic cookies to ask for the RTT */ 14835c7bb34SRandall Stewart #define BBR_RTT_PROP 0 14935c7bb34SRandall Stewart #define BBR_RTT_RACK 1 15035c7bb34SRandall Stewart #define BBR_RTT_PKTRTT 2 15135c7bb34SRandall Stewart #define BBR_SRTT 3 15235c7bb34SRandall Stewart 15335c7bb34SRandall Stewart #define BBR_SACKED 0 15435c7bb34SRandall Stewart #define BBR_CUM_ACKED 1 15535c7bb34SRandall Stewart 15635c7bb34SRandall Stewart /* threshold in useconds where we consider we need a higher min cwnd */ 15735c7bb34SRandall Stewart #define BBR_HIGH_SPEED 1000 15835c7bb34SRandall Stewart #define BBR_HIGHSPEED_NUM_MSS 12 15935c7bb34SRandall Stewart 16035c7bb34SRandall Stewart #define MAX_REDUCE_RXT 3 /* What is the maximum times we are willing to 16135c7bb34SRandall Stewart * reduce b/w in RTX's. Setting this has a 16235c7bb34SRandall Stewart * multiplicative effect e.g. if we are 16335c7bb34SRandall Stewart * reducing by 20% then setting it to 3 means 16435c7bb34SRandall Stewart * you will have reduced the b/w estimate by > 16535c7bb34SRandall Stewart * 60% before you stop. */ 16635c7bb34SRandall Stewart /* 16735c7bb34SRandall Stewart * We use the rate sample structure to 16835c7bb34SRandall Stewart * assist in single sack/ack rate and rtt 16935c7bb34SRandall Stewart * calculation. In the future we will expand 17035c7bb34SRandall Stewart * this in BBR to do forward rate sample 17135c7bb34SRandall Stewart * b/w estimation. 17235c7bb34SRandall Stewart */ 17335c7bb34SRandall Stewart #define BBR_RS_RTT_EMPTY 0x00000001 /* Nothing yet stored in RTT's */ 17435c7bb34SRandall Stewart #define BBR_RS_BW_EMPTY 0x00000002 /* Nothing yet stored in cDR */ 17535c7bb34SRandall Stewart #define BBR_RS_RTT_VALID 0x00000004 /* We have at least one valid RTT */ 17635c7bb34SRandall Stewart #define BBR_RS_BW_VAILD 0x00000008 /* We have a valid cDR */ 17735c7bb34SRandall Stewart #define BBR_RS_EMPTY (BBR_RS_RTT_EMPTY|BBR_RS_BW_EMPTY) 17835c7bb34SRandall Stewart struct bbr_rtt_sample { 17935c7bb34SRandall Stewart uint32_t rs_flags; 18035c7bb34SRandall Stewart uint32_t rs_rtt_lowest; 18135c7bb34SRandall Stewart uint32_t rs_rtt_lowest_sendtime; 18235c7bb34SRandall Stewart uint32_t rs_rtt_low_seq_start; 18335c7bb34SRandall Stewart 18435c7bb34SRandall Stewart uint32_t rs_rtt_highest; 18535c7bb34SRandall Stewart uint32_t rs_rtt_cnt; 18635c7bb34SRandall Stewart 18735c7bb34SRandall Stewart uint64_t rs_rtt_tot; 18835c7bb34SRandall Stewart uint32_t cur_rtt; 18935c7bb34SRandall Stewart uint32_t cur_rtt_bytecnt; 19035c7bb34SRandall Stewart 19135c7bb34SRandall Stewart uint32_t cur_rtt_rsmcnt; 19235c7bb34SRandall Stewart uint32_t rc_crtt_set:1, 19335c7bb34SRandall Stewart avail_bits:31; 19435c7bb34SRandall Stewart uint64_t rs_cDR; 19535c7bb34SRandall Stewart }; 19635c7bb34SRandall Stewart 19735c7bb34SRandall Stewart /* RTT shrink reasons */ 19835c7bb34SRandall Stewart #define BBR_RTTS_INIT 0 19935c7bb34SRandall Stewart #define BBR_RTTS_NEWRTT 1 20035c7bb34SRandall Stewart #define BBR_RTTS_RTTPROBE 2 20135c7bb34SRandall Stewart #define BBR_RTTS_WASIDLE 3 20235c7bb34SRandall Stewart #define BBR_RTTS_PERSIST 4 20335c7bb34SRandall Stewart #define BBR_RTTS_REACHTAR 5 20435c7bb34SRandall Stewart #define BBR_RTTS_ENTERPROBE 6 20535c7bb34SRandall Stewart #define BBR_RTTS_SHRINK_PG 7 20635c7bb34SRandall Stewart #define BBR_RTTS_SHRINK_PG_FINAL 8 20735c7bb34SRandall Stewart #define BBR_RTTS_NEW_TARGET 9 20835c7bb34SRandall Stewart #define BBR_RTTS_LEAVE_DRAIN 10 20935c7bb34SRandall Stewart #define BBR_RTTS_RESETS_VALUES 11 21035c7bb34SRandall Stewart 21135c7bb34SRandall Stewart #define BBR_NUM_RATES 5 21235c7bb34SRandall Stewart /* Rate flags */ 21335c7bb34SRandall Stewart #define BBR_RT_FLAG_FREE 0x00 /* Is on the free list */ 21435c7bb34SRandall Stewart #define BBR_RT_FLAG_INUSE 0x01 /* Has been allocated */ 21535c7bb34SRandall Stewart #define BBR_RT_FLAG_READY 0x02 /* Ready to initiate a measurement. */ 21635c7bb34SRandall Stewart #define BBR_RT_FLAG_CAPPED_PRE 0x04 /* Ready to cap if we send the next segment */ 21735c7bb34SRandall Stewart #define BBR_RT_FLAG_CAPPED 0x08 /* Measurement is capped */ 21835c7bb34SRandall Stewart #define BBR_RT_FLAG_PASTFA 0x10 /* Past the first ack. */ 21935c7bb34SRandall Stewart #define BBR_RT_FLAG_LIMITED 0x20 /* Saw application/cwnd or rwnd limited period */ 22035c7bb34SRandall Stewart #define BBR_RT_SEEN_A_ACK 0x40 /* A ack has been saved */ 22135c7bb34SRandall Stewart #define BBR_RT_PREV_RTT_SET 0x80 /* There was a RTT set in */ 22235c7bb34SRandall Stewart #define BBR_RT_PREV_SEND_TIME 0x100 /* 22335c7bb34SRandall Stewart *There was a RTT send time set that can be used 22435c7bb34SRandall Stewart * no snd_limits 22535c7bb34SRandall Stewart */ 22635c7bb34SRandall Stewart #define BBR_RT_SET_GRADIENT 0x200 22735c7bb34SRandall Stewart #define BBR_RT_TS_VALID 0x400 22835c7bb34SRandall Stewart 22935c7bb34SRandall Stewart 23035c7bb34SRandall Stewart struct bbr_log { 23135c7bb34SRandall Stewart union { 23235c7bb34SRandall Stewart struct bbr_sendmap *rsm; /* For alloc/free */ 23335c7bb34SRandall Stewart uint64_t sb_acc; /* For out/ack or t-o */ 23435c7bb34SRandall Stewart }; 23535c7bb34SRandall Stewart struct tcpcb *tp; 23635c7bb34SRandall Stewart uint32_t t_flags; 23735c7bb34SRandall Stewart uint32_t th_seq; 23835c7bb34SRandall Stewart uint32_t th_ack; 23935c7bb34SRandall Stewart uint32_t snd_una; 24035c7bb34SRandall Stewart uint32_t snd_nxt; 24135c7bb34SRandall Stewart uint32_t snd_max; 24235c7bb34SRandall Stewart uint32_t snd_cwnd; 24335c7bb34SRandall Stewart uint32_t snd_wnd; 24435c7bb34SRandall Stewart uint32_t rc_lost; 24535c7bb34SRandall Stewart uint32_t target_cwnd; /* UU */ 24635c7bb34SRandall Stewart uint32_t inflight; /* UU */ 24735c7bb34SRandall Stewart uint32_t applimited; /* UU */ 24835c7bb34SRandall Stewart /* Things for BBR */ 24935c7bb34SRandall Stewart uint32_t delivered; /* UU */ 25035c7bb34SRandall Stewart uint64_t cur_del_rate; /* UU */ 25135c7bb34SRandall Stewart uint64_t delRate; /* UU */ 25235c7bb34SRandall Stewart uint64_t rttProp; /* UU */ 25335c7bb34SRandall Stewart uint64_t lt_bw; /* UU */ 25435c7bb34SRandall Stewart uint32_t timeStamp; 25535c7bb34SRandall Stewart uint32_t time; 25635c7bb34SRandall Stewart uint32_t slot; /* UU */ 25735c7bb34SRandall Stewart uint32_t delayed_by; 25835c7bb34SRandall Stewart uint32_t exp_del; 25935c7bb34SRandall Stewart uint32_t pkts_out; 26035c7bb34SRandall Stewart uint32_t new_win; 26135c7bb34SRandall Stewart uint32_t hptsi_gain; /* UU */ 26235c7bb34SRandall Stewart uint32_t cwnd_gain; /* UU */ 26335c7bb34SRandall Stewart uint32_t epoch; /* UU */ 26435c7bb34SRandall Stewart uint32_t lt_epoch; /* UU */ 26535c7bb34SRandall Stewart /* Sack fun */ 26635c7bb34SRandall Stewart uint32_t blk_start[4]; /* xx */ 26735c7bb34SRandall Stewart uint32_t blk_end[4]; 26835c7bb34SRandall Stewart uint32_t len; /* Timeout T3=1, TLP=2, RACK=3 */ 26935c7bb34SRandall Stewart uint8_t type; 27035c7bb34SRandall Stewart uint8_t n_sackblks; 27135c7bb34SRandall Stewart uint8_t applied; /* UU */ 27235c7bb34SRandall Stewart uint8_t inhpts; /* UU */ 27335c7bb34SRandall Stewart uint8_t ininput; /* UU */ 27435c7bb34SRandall Stewart uint8_t use_lt_bw; /* UU */ 27535c7bb34SRandall Stewart }; 27635c7bb34SRandall Stewart 27735c7bb34SRandall Stewart struct bbr_log_sysctl_out { 27835c7bb34SRandall Stewart uint32_t bbr_log_at; 27935c7bb34SRandall Stewart uint32_t bbr_log_max; 28035c7bb34SRandall Stewart struct bbr_log entries[0]; 28135c7bb34SRandall Stewart }; 28235c7bb34SRandall Stewart 28335c7bb34SRandall Stewart /* 28435c7bb34SRandall Stewart * Magic numbers for logging timeout events if the 28535c7bb34SRandall Stewart * logging is enabled. 28635c7bb34SRandall Stewart */ 28735c7bb34SRandall Stewart #define BBR_TO_FRM_TMR 1 28835c7bb34SRandall Stewart #define BBR_TO_FRM_TLP 2 28935c7bb34SRandall Stewart #define BBR_TO_FRM_RACK 3 29035c7bb34SRandall Stewart #define BBR_TO_FRM_KEEP 4 29135c7bb34SRandall Stewart #define BBR_TO_FRM_PERSIST 5 29235c7bb34SRandall Stewart #define BBR_TO_FRM_DELACK 6 29335c7bb34SRandall Stewart 29435c7bb34SRandall Stewart #define BBR_SEES_STRETCH_ACK 1 29535c7bb34SRandall Stewart #define BBR_SEES_COMPRESSED_ACKS 2 29635c7bb34SRandall Stewart 29735c7bb34SRandall Stewart 29835c7bb34SRandall Stewart /* 29935c7bb34SRandall Stewart * As we get each SACK we wade through the 30035c7bb34SRandall Stewart * rc_map and mark off what is acked. 30135c7bb34SRandall Stewart * We also increment rc_sacked as well. 30235c7bb34SRandall Stewart * 30335c7bb34SRandall Stewart * We also pay attention to missing entries 30435c7bb34SRandall Stewart * based on the time and possibly mark them 30535c7bb34SRandall Stewart * for retransmit. If we do and we are not already 30635c7bb34SRandall Stewart * in recovery we enter recovery. In doing 30735c7bb34SRandall Stewart * so we claer prr_delivered/holes_rxt and prr_sent_dur_rec. 30835c7bb34SRandall Stewart * We also setup rc_next/rc_snd_nxt/rc_send_end so 30935c7bb34SRandall Stewart * we will know where to send from. When not in 31035c7bb34SRandall Stewart * recovery rc_next will be NULL and rc_snd_nxt should 31135c7bb34SRandall Stewart * equal snd_max. 31235c7bb34SRandall Stewart * 31335c7bb34SRandall Stewart * Whenever we retransmit from recovery we increment 31435c7bb34SRandall Stewart * rc_holes_rxt as we retran a block and mark it as retransmitted 31535c7bb34SRandall Stewart * with the time it was sent. During non-recovery sending we 31635c7bb34SRandall Stewart * add to our map and note the time down of any send expanding 31735c7bb34SRandall Stewart * the rc_map at the tail and moving rc_snd_nxt up with snd_max. 31835c7bb34SRandall Stewart * 31935c7bb34SRandall Stewart * In recovery during SACK/ACK processing if a chunk has 32035c7bb34SRandall Stewart * been retransmitted and it is now acked, we decrement rc_holes_rxt. 32135c7bb34SRandall Stewart * When we retransmit from the scoreboard we use 32235c7bb34SRandall Stewart * rc_next and rc_snd_nxt/rc_send_end to help us 32335c7bb34SRandall Stewart * find what needs to be retran. 32435c7bb34SRandall Stewart * 32535c7bb34SRandall Stewart * To calculate pipe we simply take (snd_max - snd_una) + rc_holes_rxt 32635c7bb34SRandall Stewart * This gets us the effect of RFC6675 pipe, counting twice for 32735c7bb34SRandall Stewart * bytes retransmitted. 32835c7bb34SRandall Stewart */ 32935c7bb34SRandall Stewart 33035c7bb34SRandall Stewart #define TT_BBR_FR_TMR 0x2001 33135c7bb34SRandall Stewart 33235c7bb34SRandall Stewart #define BBR_SCALE 8 33335c7bb34SRandall Stewart #define BBR_UNIT (1 << BBR_SCALE) 33435c7bb34SRandall Stewart 33535c7bb34SRandall Stewart #define BBR_NUM_RTTS_FOR_DEL_LIMIT 8 /* How many pkt-rtts do we keep 33635c7bb34SRandall Stewart * Delivery rate for */ 33735c7bb34SRandall Stewart #define BBR_NUM_RTTS_FOR_GOOG_DEL_LIMIT 10 /* How many pkt-rtts do we keep 33835c7bb34SRandall Stewart * Delivery rate for google */ 33935c7bb34SRandall Stewart 34035c7bb34SRandall Stewart #define BBR_SECONDS_NO_RTT 10 /* 10 seconds with no RTT shrinkage */ 34135c7bb34SRandall Stewart #define BBR_PROBERTT_MAX 200 /* 200ms */ 34235c7bb34SRandall Stewart #define BBR_PROBERTT_NUM_MSS 4 34335c7bb34SRandall Stewart #define BBR_STARTUP_EPOCHS 3 34435c7bb34SRandall Stewart #define USECS_IN_MSEC 1000 34535c7bb34SRandall Stewart #define BBR_TIME_TO_SECONDS(a) (a / USECS_IN_SECOND) 34635c7bb34SRandall Stewart #define BBR_TIME_TO_MILLI(a) (a / MS_IN_USEC) 34735c7bb34SRandall Stewart 34835c7bb34SRandall Stewart 34935c7bb34SRandall Stewart /* BBR keeps time in usec's so we divide by 1000 and round up */ 35035c7bb34SRandall Stewart #define BBR_TS_TO_MS(t) ((t+999)/MS_IN_USEC) 35135c7bb34SRandall Stewart 35235c7bb34SRandall Stewart /* 35335c7bb34SRandall Stewart * Locking for the rack control block. 35435c7bb34SRandall Stewart * a) Locked by INP_WLOCK 35535c7bb34SRandall Stewart * b) Locked by the hpts-mutex 35635c7bb34SRandall Stewart * 35735c7bb34SRandall Stewart */ 35835c7bb34SRandall Stewart #define BBR_STATE_STARTUP 0x01 35935c7bb34SRandall Stewart #define BBR_STATE_DRAIN 0x02 36035c7bb34SRandall Stewart #define BBR_STATE_PROBE_BW 0x03 36135c7bb34SRandall Stewart #define BBR_STATE_PROBE_RTT 0x04 36235c7bb34SRandall Stewart #define BBR_STATE_IDLE_EXIT 0x05 36335c7bb34SRandall Stewart 36435c7bb34SRandall Stewart /* Substate defines for STATE == PROBE_BW */ 36535c7bb34SRandall Stewart #define BBR_SUB_GAIN 0 /* State 0 where we are 5/4 BBR_UNIT */ 36635c7bb34SRandall Stewart #define BBR_SUB_DRAIN 1 /* State 1 where we are at 3/4 BBR_UNIT */ 36735c7bb34SRandall Stewart #define BBR_SUB_LEVEL1 2 /* State 1 first BBR_UNIT */ 36835c7bb34SRandall Stewart #define BBR_SUB_LEVEL2 3 /* State 2nd BBR_UNIT */ 36935c7bb34SRandall Stewart #define BBR_SUB_LEVEL3 4 /* State 3rd BBR_UNIT */ 37035c7bb34SRandall Stewart #define BBR_SUB_LEVEL4 5 /* State 4th BBR_UNIT */ 37135c7bb34SRandall Stewart #define BBR_SUB_LEVEL5 6 /* State 5th BBR_UNIT */ 37235c7bb34SRandall Stewart #define BBR_SUB_LEVEL6 7 /* State last BBR_UNIT */ 37335c7bb34SRandall Stewart #define BBR_SUBSTATE_COUNT 8 37435c7bb34SRandall Stewart 37535c7bb34SRandall Stewart /* Single remaining reduce log */ 37635c7bb34SRandall Stewart #define BBR_REDUCE_AT_FR 5 37735c7bb34SRandall Stewart 37835c7bb34SRandall Stewart #define BBR_BIG_LOG_SIZE 300000 37935c7bb34SRandall Stewart 38035c7bb34SRandall Stewart struct bbr_stats { 38135c7bb34SRandall Stewart uint64_t bbr_badfr; /* 0 */ 38235c7bb34SRandall Stewart uint64_t bbr_badfr_bytes; /* 1 */ 38335c7bb34SRandall Stewart uint64_t bbr_saw_oerr; /* 2 */ 38435c7bb34SRandall Stewart uint64_t bbr_saw_emsgsiz; /* 3 */ 38535c7bb34SRandall Stewart uint64_t bbr_reorder_seen; /* 4 */ 38635c7bb34SRandall Stewart uint64_t bbr_tlp_tot; /* 5 */ 38735c7bb34SRandall Stewart uint64_t bbr_tlp_newdata; /* 6 */ 38835c7bb34SRandall Stewart uint64_t bbr_offset_recovery; /* 7 */ 38935c7bb34SRandall Stewart uint64_t bbr_tlp_retran_fail; /* 8 */ 39035c7bb34SRandall Stewart uint64_t bbr_to_tot; /* 9 */ 39135c7bb34SRandall Stewart uint64_t bbr_to_arm_rack; /* 10 */ 39235c7bb34SRandall Stewart uint64_t bbr_enter_probertt; /* 11 */ 39335c7bb34SRandall Stewart uint64_t bbr_tlp_set; /* 12 */ 39435c7bb34SRandall Stewart uint64_t bbr_resends_set; /* 13 */ 39535c7bb34SRandall Stewart uint64_t bbr_force_output; /* 14 */ 39635c7bb34SRandall Stewart uint64_t bbr_to_arm_tlp; /* 15 */ 39735c7bb34SRandall Stewart uint64_t bbr_paced_segments; /* 16 */ 39835c7bb34SRandall Stewart uint64_t bbr_saw_enobuf; /* 17 */ 39935c7bb34SRandall Stewart uint64_t bbr_to_alloc_failed; /* 18 */ 40035c7bb34SRandall Stewart uint64_t bbr_to_alloc_emerg; /* 19 */ 40135c7bb34SRandall Stewart uint64_t bbr_sack_proc_all; /* 20 */ 40235c7bb34SRandall Stewart uint64_t bbr_sack_proc_short; /* 21 */ 40335c7bb34SRandall Stewart uint64_t bbr_sack_proc_restart; /* 22 */ 40435c7bb34SRandall Stewart uint64_t bbr_to_alloc; /* 23 */ 40535c7bb34SRandall Stewart uint64_t bbr_offset_drop; /* 24 */ 40635c7bb34SRandall Stewart uint64_t bbr_runt_sacks; /* 25 */ 40735c7bb34SRandall Stewart uint64_t bbr_sack_passed; /* 26 */ 40835c7bb34SRandall Stewart uint64_t bbr_rlock_left_ret0; /* 27 */ 40935c7bb34SRandall Stewart uint64_t bbr_rlock_left_ret1; /* 28 */ 41035c7bb34SRandall Stewart uint64_t bbr_dynamic_rwnd; /* 29 */ 41135c7bb34SRandall Stewart uint64_t bbr_static_rwnd; /* 30 */ 41235c7bb34SRandall Stewart uint64_t bbr_sack_blocks; /* 31 */ 41335c7bb34SRandall Stewart uint64_t bbr_sack_blocks_skip; /* 32 */ 41435c7bb34SRandall Stewart uint64_t bbr_sack_search_both; /* 33 */ 41535c7bb34SRandall Stewart uint64_t bbr_sack_search_fwd; /* 34 */ 41635c7bb34SRandall Stewart uint64_t bbr_sack_search_back; /* 35 */ 41735c7bb34SRandall Stewart uint64_t bbr_plain_acks; /* 36 */ 41835c7bb34SRandall Stewart uint64_t bbr_acks_with_sacks; /* 37 */ 41935c7bb34SRandall Stewart uint64_t bbr_progress_drops; /* 38 */ 42035c7bb34SRandall Stewart uint64_t bbr_early; /* 39 */ 42135c7bb34SRandall Stewart uint64_t bbr_reneges_seen; /* 40 */ 42235c7bb34SRandall Stewart uint64_t bbr_persist_reneg; /* 41 */ 42335c7bb34SRandall Stewart uint64_t bbr_dropped_af_data; /* 42 */ 42435c7bb34SRandall Stewart uint64_t bbr_failed_mbuf_aloc; /* 43 */ 42535c7bb34SRandall Stewart uint64_t bbr_cwnd_limited; /* 44 */ 42635c7bb34SRandall Stewart uint64_t bbr_rwnd_limited; /* 45 */ 42735c7bb34SRandall Stewart uint64_t bbr_app_limited; /* 46 */ 42835c7bb34SRandall Stewart uint64_t bbr_force_timer_start; /* 47 */ 42935c7bb34SRandall Stewart uint64_t bbr_hpts_min_time; /* 48 */ 43035c7bb34SRandall Stewart uint64_t bbr_meets_tso_thresh; /* 49 */ 43135c7bb34SRandall Stewart uint64_t bbr_miss_tso_rwnd; /* 50 */ 43235c7bb34SRandall Stewart uint64_t bbr_miss_tso_cwnd; /* 51 */ 43335c7bb34SRandall Stewart uint64_t bbr_miss_tso_app; /* 52 */ 43435c7bb34SRandall Stewart uint64_t bbr_miss_retran; /* 53 */ 43535c7bb34SRandall Stewart uint64_t bbr_miss_tlp; /* 54 */ 43635c7bb34SRandall Stewart uint64_t bbr_miss_unknown; /* 55 */ 43735c7bb34SRandall Stewart uint64_t bbr_hdwr_rl_add_ok; /* 56 */ 43835c7bb34SRandall Stewart uint64_t bbr_hdwr_rl_add_fail; /* 57 */ 43935c7bb34SRandall Stewart uint64_t bbr_hdwr_rl_mod_ok; /* 58 */ 44035c7bb34SRandall Stewart uint64_t bbr_hdwr_rl_mod_fail; /* 59 */ 44135c7bb34SRandall Stewart uint64_t bbr_collapsed_win; /* 60 */ 44235c7bb34SRandall Stewart uint64_t bbr_alloc_limited; /* 61 */ 44335c7bb34SRandall Stewart uint64_t bbr_alloc_limited_conns; /* 62 */ 44435c7bb34SRandall Stewart uint64_t bbr_split_limited; /* 63 */ 44535c7bb34SRandall Stewart }; 44635c7bb34SRandall Stewart 44735c7bb34SRandall Stewart /* 44835c7bb34SRandall Stewart * The structure bbr_opt_stats is a simple 44935c7bb34SRandall Stewart * way to see how many options are being 45035c7bb34SRandall Stewart * changed in the stack. 45135c7bb34SRandall Stewart */ 45235c7bb34SRandall Stewart struct bbr_opts_stats { 45335c7bb34SRandall Stewart uint64_t tcp_bbr_pace_per_sec; 45435c7bb34SRandall Stewart uint64_t tcp_bbr_pace_del_tar; 45535c7bb34SRandall Stewart uint64_t tcp_bbr_pace_seg_max; 45635c7bb34SRandall Stewart uint64_t tcp_bbr_pace_seg_min; 45735c7bb34SRandall Stewart uint64_t tcp_bbr_pace_cross; 45835c7bb34SRandall Stewart uint64_t tcp_bbr_drain_inc_extra; 45935c7bb34SRandall Stewart uint64_t tcp_bbr_unlimited; 46035c7bb34SRandall Stewart uint64_t tcp_bbr_iwintso; 46135c7bb34SRandall Stewart uint64_t tcp_bbr_rec_over_hpts; 46235c7bb34SRandall Stewart uint64_t tcp_bbr_recforce; 46335c7bb34SRandall Stewart uint64_t tcp_bbr_startup_pg; 46435c7bb34SRandall Stewart uint64_t tcp_bbr_drain_pg; 46535c7bb34SRandall Stewart uint64_t tcp_bbr_rwnd_is_app; 46635c7bb34SRandall Stewart uint64_t tcp_bbr_probe_rtt_int; 46735c7bb34SRandall Stewart uint64_t tcp_bbr_one_retran; 46835c7bb34SRandall Stewart uint64_t tcp_bbr_startup_loss_exit; 46935c7bb34SRandall Stewart uint64_t tcp_bbr_use_lowgain; 47035c7bb34SRandall Stewart uint64_t tcp_bbr_lowgain_thresh; 47135c7bb34SRandall Stewart uint64_t tcp_bbr_lowgain_half; 47235c7bb34SRandall Stewart uint64_t tcp_bbr_lowgain_fd; 47335c7bb34SRandall Stewart uint64_t tcp_bbr_usedel_rate; 47435c7bb34SRandall Stewart uint64_t tcp_bbr_min_rto; 47535c7bb34SRandall Stewart uint64_t tcp_bbr_max_rto; 47635c7bb34SRandall Stewart uint64_t tcp_rack_pace_max_seg; 47735c7bb34SRandall Stewart uint64_t tcp_rack_min_to; 47835c7bb34SRandall Stewart uint64_t tcp_rack_reord_thresh; 47935c7bb34SRandall Stewart uint64_t tcp_rack_reord_fade; 48035c7bb34SRandall Stewart uint64_t tcp_rack_tlp_thresh; 48135c7bb34SRandall Stewart uint64_t tcp_rack_pkt_delay; 48235c7bb34SRandall Stewart uint64_t tcp_bbr_startup_exit_epoch; 48335c7bb34SRandall Stewart uint64_t tcp_bbr_ack_comp_alg; 48435c7bb34SRandall Stewart uint64_t tcp_rack_cheat; 48535c7bb34SRandall Stewart uint64_t tcp_iwnd_tso; 48635c7bb34SRandall Stewart uint64_t tcp_utter_max_tso; 48735c7bb34SRandall Stewart uint64_t tcp_hdwr_pacing; 48835c7bb34SRandall Stewart uint64_t tcp_extra_state; 48935c7bb34SRandall Stewart uint64_t tcp_floor_min_tso; 49035c7bb34SRandall Stewart /* New */ 49135c7bb34SRandall Stewart uint64_t tcp_bbr_algorithm; 49235c7bb34SRandall Stewart uint64_t tcp_bbr_tslimits; 49335c7bb34SRandall Stewart uint64_t tcp_bbr_probertt_len; 49435c7bb34SRandall Stewart uint64_t tcp_bbr_probertt_gain; 49535c7bb34SRandall Stewart uint64_t tcp_bbr_topaceout; 49635c7bb34SRandall Stewart uint64_t tcp_use_rackcheat; 49735c7bb34SRandall Stewart uint64_t tcp_delack; 49835c7bb34SRandall Stewart uint64_t tcp_maxpeak; 49935c7bb34SRandall Stewart uint64_t tcp_retran_wtso; 50035c7bb34SRandall Stewart uint64_t tcp_data_ac; 50135c7bb34SRandall Stewart uint64_t tcp_ts_raises; 50235c7bb34SRandall Stewart uint64_t tcp_pacing_oh_tmr; 50335c7bb34SRandall Stewart uint64_t tcp_pacing_oh; 50435c7bb34SRandall Stewart uint64_t tcp_policer_det; 50535c7bb34SRandall Stewart }; 50635c7bb34SRandall Stewart 50735c7bb34SRandall Stewart 50835c7bb34SRandall Stewart #ifdef _KERNEL 50935c7bb34SRandall Stewart #define BBR_STAT_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t)) 51035c7bb34SRandall Stewart extern counter_u64_t bbr_stat_arry[BBR_STAT_SIZE]; 51135c7bb34SRandall Stewart #define BBR_STAT_ADD(name, amm) counter_u64_add(bbr_stat_arry[(offsetof(struct bbr_stats, name)/sizeof(uint64_t))], (amm)) 51235c7bb34SRandall Stewart #define BBR_STAT_INC(name) BBR_STAT_ADD(name, 1) 51335c7bb34SRandall Stewart #define BBR_OPTS_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t)) 51435c7bb34SRandall Stewart extern counter_u64_t bbr_opts_arry[BBR_OPTS_SIZE]; 51535c7bb34SRandall Stewart #define BBR_OPTS_ADD(name, amm) counter_u64_add(bbr_opts_arry[(offsetof(struct bbr_opts_stats, name)/sizeof(uint64_t))], (amm)) 51635c7bb34SRandall Stewart #define BBR_OPTS_INC(name) BBR_OPTS_ADD(name, 1) 51735c7bb34SRandall Stewart #endif 51835c7bb34SRandall Stewart 51935c7bb34SRandall Stewart #define BBR_NUM_LOSS_RATES 3 52035c7bb34SRandall Stewart #define BBR_NUM_BW_RATES 3 52135c7bb34SRandall Stewart 52235c7bb34SRandall Stewart #define BBR_RECOVERY_LOWRTT 1 52335c7bb34SRandall Stewart #define BBR_RECOVERY_MEDRTT 2 52435c7bb34SRandall Stewart #define BBR_RECOVERY_HIGHRTT 3 52535c7bb34SRandall Stewart #define BBR_RECOVERY_EXTREMERTT 4 52635c7bb34SRandall Stewart 52735c7bb34SRandall Stewart 52835c7bb34SRandall Stewart struct bbr_control { 52935c7bb34SRandall Stewart /*******************************/ 53035c7bb34SRandall Stewart /* Cache line 2 from bbr start */ 53135c7bb34SRandall Stewart /*******************************/ 53235c7bb34SRandall Stewart struct bbr_head rc_map; /* List of all segments Lock(a) */ 53335c7bb34SRandall Stewart struct bbr_head rc_tmap; /* List in transmit order Lock(a) */ 53435c7bb34SRandall Stewart struct bbr_sendmap *rc_resend; /* something we have been asked to 53535c7bb34SRandall Stewart * resend */ 53635c7bb34SRandall Stewart uint32_t rc_last_delay_val; /* How much we expect to delay Lock(a) */ 53735c7bb34SRandall Stewart uint32_t rc_bbr_hptsi_gain:16, /* Current hptsi gain Lock(a) */ 53835c7bb34SRandall Stewart rc_hpts_flags:16; /* flags on whats on the pacer wheel */ 53935c7bb34SRandall Stewart 54035c7bb34SRandall Stewart uint32_t rc_delivered; /* BRR delivered amount Lock(a) */ 54135c7bb34SRandall Stewart uint32_t rc_hptsi_agg_delay; /* How much time are we behind */ 54235c7bb34SRandall Stewart 54335c7bb34SRandall Stewart uint32_t rc_flight_at_input; 54435c7bb34SRandall Stewart uint32_t rc_lost_bytes; /* Total bytes currently marked lost */ 54535c7bb34SRandall Stewart /*******************************/ 54635c7bb34SRandall Stewart /* Cache line 3 from bbr start */ 54735c7bb34SRandall Stewart /*******************************/ 54835c7bb34SRandall Stewart struct time_filter rc_delrate; 54935c7bb34SRandall Stewart /*******************************/ 55035c7bb34SRandall Stewart /* Cache line 4 from bbr start */ 55135c7bb34SRandall Stewart /*******************************/ 55235c7bb34SRandall Stewart struct bbr_head rc_free; /* List of Free map entries Lock(a) */ 55335c7bb34SRandall Stewart struct bbr_sendmap *rc_tlp_send; /* something we have been 55435c7bb34SRandall Stewart * asked to resend */ 55535c7bb34SRandall Stewart uint32_t rc_del_time; 55635c7bb34SRandall Stewart uint32_t rc_target_at_state; /* Target for a state */ 55735c7bb34SRandall Stewart 55835c7bb34SRandall Stewart uint16_t rc_free_cnt; /* Number of free entries on the rc_free list 55935c7bb34SRandall Stewart * Lock(a) */ 56035c7bb34SRandall Stewart uint16_t rc_startup_pg; 56135c7bb34SRandall Stewart 56235c7bb34SRandall Stewart uint32_t cur_rtt; /* Last RTT from ack */ 56335c7bb34SRandall Stewart 56435c7bb34SRandall Stewart 56535c7bb34SRandall Stewart uint32_t rc_went_idle_time; /* Used for persits to see if its 56635c7bb34SRandall Stewart * probe-rtt qualified */ 56735c7bb34SRandall Stewart uint32_t rc_pace_max_segs:17, /* How much in any single TSO we send Lock(a) */ 56835c7bb34SRandall Stewart rc_pace_min_segs:15; /* The minimum single segment size before we enter persists */ 56935c7bb34SRandall Stewart 57035c7bb34SRandall Stewart uint32_t rc_rtt_shrinks; /* Time of last rtt shrinkage Lock(a) */ 57135c7bb34SRandall Stewart uint32_t r_app_limited_until; 57235c7bb34SRandall Stewart uint32_t rc_timer_exp; /* If a timer ticks of expiry */ 57335c7bb34SRandall Stewart uint32_t rc_rcv_epoch_start; /* Start time of the Epoch Lock(a) */ 57435c7bb34SRandall Stewart 57535c7bb34SRandall Stewart /*******************************/ 57635c7bb34SRandall Stewart /* Cache line 5 from bbr start */ 57735c7bb34SRandall Stewart /*******************************/ 57835c7bb34SRandall Stewart 57935c7bb34SRandall Stewart uint32_t rc_lost_at_pktepoch; /* what the lost value was at the last 58035c7bb34SRandall Stewart * pkt-epoch */ 58135c7bb34SRandall Stewart uint32_t r_measurement_count; /* count of measurement applied lock(a) */ 58235c7bb34SRandall Stewart 58335c7bb34SRandall Stewart 58435c7bb34SRandall Stewart uint32_t rc_last_tlp_seq; /* Last tlp sequence Lock(a) */ 58535c7bb34SRandall Stewart uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ 58635c7bb34SRandall Stewart uint16_t rc_pkt_delay; /* Socket option value Lock(a) */ 58735c7bb34SRandall Stewart 58835c7bb34SRandall Stewart struct bbr_sendmap *rc_sacklast; /* sack remembered place 58935c7bb34SRandall Stewart * Lock(a) */ 59035c7bb34SRandall Stewart struct bbr_sendmap *rc_next; /* remembered place where we next 59135c7bb34SRandall Stewart * retransmit at Lock(a) */ 59235c7bb34SRandall Stewart 59335c7bb34SRandall Stewart uint32_t rc_sacked; /* Tot sacked on scoreboard Lock(a) */ 59435c7bb34SRandall Stewart uint32_t rc_holes_rxt; /* Tot retraned from scoreboard Lock(a) */ 59535c7bb34SRandall Stewart 59635c7bb34SRandall Stewart uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */ 59735c7bb34SRandall Stewart uint32_t rc_init_rwnd; /* Initial rwnd when we transitioned */ 59835c7bb34SRandall Stewart /*- --- 5995aa0576bSEd Maste * used only initial and close 60035c7bb34SRandall Stewart */ 60135c7bb34SRandall Stewart uint32_t rc_high_rwnd; /* Highest rwnd seen */ 60235c7bb34SRandall Stewart uint32_t rc_lowest_rtt; /* Smallest RTT we have seen */ 60335c7bb34SRandall Stewart 60435c7bb34SRandall Stewart uint32_t rc_last_rtt; /* Last valid measured RTT that ack'd data */ 60535c7bb34SRandall Stewart uint32_t bbr_cross_over; 60635c7bb34SRandall Stewart 60735c7bb34SRandall Stewart /*******************************/ 60835c7bb34SRandall Stewart /* Cache line 6 from bbr start */ 60935c7bb34SRandall Stewart /*******************************/ 61035c7bb34SRandall Stewart struct sack_filter bbr_sf; 61135c7bb34SRandall Stewart 61235c7bb34SRandall Stewart /*******************************/ 61335c7bb34SRandall Stewart /* Cache line 7 from bbr start */ 61435c7bb34SRandall Stewart /*******************************/ 61535c7bb34SRandall Stewart struct time_filter_small rc_rttprop; 61635c7bb34SRandall Stewart uint32_t last_inbound_ts; /* Peers last timestamp */ 61735c7bb34SRandall Stewart 61835c7bb34SRandall Stewart uint32_t rc_inc_tcp_oh: 1, 61935c7bb34SRandall Stewart rc_inc_ip_oh: 1, 62035c7bb34SRandall Stewart rc_inc_enet_oh:1, 62135c7bb34SRandall Stewart rc_incr_tmrs:1, 62235c7bb34SRandall Stewart restrict_growth:28; 62335c7bb34SRandall Stewart uint32_t rc_lt_epoch_use; /* When we started lt-bw use Lock(a) */ 62435c7bb34SRandall Stewart 62535c7bb34SRandall Stewart uint32_t rc_recovery_start; /* Time we start recovery Lock(a) */ 62635c7bb34SRandall Stewart uint32_t rc_lt_del; /* Delivered at lt bw sampling start Lock(a) */ 62735c7bb34SRandall Stewart 62835c7bb34SRandall Stewart uint64_t rc_bbr_cur_del_rate; /* Current measured delivery rate 62935c7bb34SRandall Stewart * Lock(a) */ 63035c7bb34SRandall Stewart 63135c7bb34SRandall Stewart /*******************************/ 63235c7bb34SRandall Stewart /* Cache line 8 from bbr start */ 63335c7bb34SRandall Stewart /*******************************/ 63435c7bb34SRandall Stewart uint32_t rc_cwnd_on_ent; /* On entry to recovery the cwnd 63535c7bb34SRandall Stewart * Lock(a) */ 63635c7bb34SRandall Stewart uint32_t rc_agg_early; /* aggregate amount early */ 63735c7bb34SRandall Stewart 63835c7bb34SRandall Stewart uint32_t rc_rcvtime; /* When we last received data Lock(a) */ 63935c7bb34SRandall Stewart uint32_t rc_pkt_epoch_del; /* seq num that we need for RTT epoch */ 64035c7bb34SRandall Stewart 64135c7bb34SRandall Stewart uint32_t rc_pkt_epoch; /* Epoch based on packet RTTs */ 64235c7bb34SRandall Stewart uint32_t rc_pkt_epoch_time; /* Time we started the pkt epoch */ 64335c7bb34SRandall Stewart 64435c7bb34SRandall Stewart uint32_t rc_pkt_epoch_rtt; /* RTT using the packet epoch */ 64535c7bb34SRandall Stewart uint32_t rc_rtt_epoch; /* Current RTT epoch, it ticks every rttProp 64635c7bb34SRandall Stewart * Lock(a) */ 64735c7bb34SRandall Stewart uint32_t lowest_rtt; 64835c7bb34SRandall Stewart uint32_t bbr_smallest_srtt_this_state; 64935c7bb34SRandall Stewart 65035c7bb34SRandall Stewart uint32_t rc_lt_epoch; /* LT epoch start of bw_sampling */ 65135c7bb34SRandall Stewart uint32_t rc_lost_at_startup; 65235c7bb34SRandall Stewart 65335c7bb34SRandall Stewart uint32_t rc_bbr_state_atflight; 65435c7bb34SRandall Stewart uint32_t rc_bbr_last_startup_epoch; /* Last startup epoch where we 65535c7bb34SRandall Stewart * increased 20% */ 65635c7bb34SRandall Stewart uint32_t rc_bbr_enters_probertt; /* Timestamp we entered 65735c7bb34SRandall Stewart * probertt Lock(a) */ 65835c7bb34SRandall Stewart uint32_t rc_lt_time; /* Time of lt sampling start Lock(a) */ 65935c7bb34SRandall Stewart 66035c7bb34SRandall Stewart /*******************************/ 66135c7bb34SRandall Stewart /* Cache line 9 from bbr start */ 66235c7bb34SRandall Stewart /*******************************/ 66335c7bb34SRandall Stewart uint64_t rc_lt_bw; /* LT bw calculated Lock(a) */ 66435c7bb34SRandall Stewart uint64_t rc_bbr_lastbtlbw; /* For startup, what was last btlbw I 66535c7bb34SRandall Stewart * saw to check the 20% gain Lock(a) */ 66635c7bb34SRandall Stewart 66735c7bb34SRandall Stewart 66835c7bb34SRandall Stewart uint32_t rc_bbr_cwnd_gain; /* Current cwnd gain Lock(a) */ 66935c7bb34SRandall Stewart uint32_t rc_pkt_epoch_loss_rate; /* pkt-epoch loss rate */ 67035c7bb34SRandall Stewart 67135c7bb34SRandall Stewart uint32_t rc_saved_cwnd; /* Saved cwnd during Probe-rtt drain Lock(a) */ 67235c7bb34SRandall Stewart uint32_t substate_pe; 67335c7bb34SRandall Stewart 67435c7bb34SRandall Stewart uint32_t rc_lost; /* Number of bytes lost Lock(a) */ 67535c7bb34SRandall Stewart uint32_t rc_exta_time_gd; /* How much extra time we got in d/g */ 67635c7bb34SRandall Stewart 67735c7bb34SRandall Stewart uint32_t rc_lt_lost; /* Number of lt bytes lost at sampling start 67835c7bb34SRandall Stewart * Lock(a) */ 67935c7bb34SRandall Stewart uint32_t rc_bbr_state_time; 68035c7bb34SRandall Stewart 68135c7bb34SRandall Stewart uint32_t rc_min_to; /* Socket option value Lock(a) */ 68235c7bb34SRandall Stewart uint32_t rc_initial_hptsi_bw; /* Our initial startup bw Lock(a) */ 68335c7bb34SRandall Stewart 68435c7bb34SRandall Stewart uint32_t bbr_lost_at_state; /* Temp counter debug lost value as we 68535c7bb34SRandall Stewart * enter a state */ 68635c7bb34SRandall Stewart /*******************************/ 68735c7bb34SRandall Stewart /* Cache line 10 from bbr start */ 68835c7bb34SRandall Stewart /*******************************/ 68935c7bb34SRandall Stewart uint32_t rc_level_state_extra; 69035c7bb34SRandall Stewart uint32_t rc_red_cwnd_pe; 69135c7bb34SRandall Stewart const struct tcp_hwrate_limit_table *crte; 69235c7bb34SRandall Stewart uint64_t red_bw; 69335c7bb34SRandall Stewart 69435c7bb34SRandall Stewart uint32_t rc_probertt_int; 69535c7bb34SRandall Stewart uint32_t rc_probertt_srttchktim; /* Time we last did a srtt 69635c7bb34SRandall Stewart * check */ 69735c7bb34SRandall Stewart uint32_t gain_epoch; /* Epoch we should be out of gain */ 69835c7bb34SRandall Stewart uint32_t rc_min_rto_ms; 69935c7bb34SRandall Stewart 70035c7bb34SRandall Stewart uint32_t rc_reorder_fade; /* Socket option value Lock(a) */ 70135c7bb34SRandall Stewart uint32_t last_startup_measure; 70235c7bb34SRandall Stewart 70335c7bb34SRandall Stewart int32_t bbr_hptsi_per_second; 70435c7bb34SRandall Stewart int32_t bbr_hptsi_segments_delay_tar; 70535c7bb34SRandall Stewart 70635c7bb34SRandall Stewart int32_t bbr_hptsi_segments_max; 70735c7bb34SRandall Stewart uint32_t bbr_rttprobe_gain_val; 70835c7bb34SRandall Stewart /*******************************/ 70935c7bb34SRandall Stewart /* Cache line 11 from bbr start */ 71035c7bb34SRandall Stewart /*******************************/ 71135c7bb34SRandall Stewart uint32_t cur_rtt_send_time; /* Time we sent our rtt measured packet */ 71235c7bb34SRandall Stewart uint32_t bbr_peer_tsratio; /* Our calculated ts ratio to multply */ 71335c7bb34SRandall Stewart uint32_t bbr_ts_check_tstmp; /* When we filled it the TS that came on the ack */ 71435c7bb34SRandall Stewart uint32_t bbr_ts_check_our_cts; /* When we filled it the cts of the send */ 71535c7bb34SRandall Stewart uint32_t rc_tlp_rxt_last_time; 71635c7bb34SRandall Stewart uint32_t bbr_smallest_srtt_state2; 71735c7bb34SRandall Stewart uint32_t bbr_hdwr_cnt_noset_snt; /* count of hw pacing sends during delay */ 71835c7bb34SRandall Stewart uint32_t startup_last_srtt; 71935c7bb34SRandall Stewart uint32_t rc_ack_hdwr_delay; 72035c7bb34SRandall Stewart uint32_t highest_hdwr_delay; /* Largest delay we have seen from hardware */ 72135c7bb34SRandall Stewart uint32_t non_gain_extra; 72235c7bb34SRandall Stewart uint32_t recovery_lr; /* The sum of the loss rate from the pe's during recovery */ 72335c7bb34SRandall Stewart uint32_t last_in_probertt; 72435c7bb34SRandall Stewart uint32_t flightsize_at_drain; /* In draining what was the last marked flight size */ 72535c7bb34SRandall Stewart uint32_t rc_pe_of_prtt; /* PE we went into probe-rtt */ 72635c7bb34SRandall Stewart uint32_t ts_in; /* ts that went with the last rtt */ 72735c7bb34SRandall Stewart 72835c7bb34SRandall Stewart uint16_t rc_tlp_seg_send_cnt; /* Number of times we have TLP sent 72935c7bb34SRandall Stewart * rc_last_tlp_seq Lock(a) */ 73035c7bb34SRandall Stewart uint16_t rc_drain_pg; 73135c7bb34SRandall Stewart uint32_t rc_num_maps_alloced; /* num send map entries allocated */ 73235c7bb34SRandall Stewart uint32_t rc_num_split_allocs; /* num split map entries allocated */ 73335c7bb34SRandall Stewart uint16_t rc_num_small_maps_alloced; /* Number of sack blocks 73435c7bb34SRandall Stewart * allocated */ 73535c7bb34SRandall Stewart uint16_t bbr_hptsi_bytes_min; 73635c7bb34SRandall Stewart 73735c7bb34SRandall Stewart uint16_t bbr_hptsi_segments_floor; 73835c7bb34SRandall Stewart uint16_t bbr_utter_max; 73935c7bb34SRandall Stewart uint16_t bbr_google_discount; 74035c7bb34SRandall Stewart 74135c7bb34SRandall Stewart }; 74235c7bb34SRandall Stewart 74335c7bb34SRandall Stewart 74435c7bb34SRandall Stewart struct socket; 74535c7bb34SRandall Stewart struct tcp_bbr { 74635c7bb34SRandall Stewart /* First cache line 0x00 */ 74735c7bb34SRandall Stewart int32_t(*r_substate) (struct mbuf *, struct tcphdr *, 74835c7bb34SRandall Stewart struct socket *, struct tcpcb *, struct tcpopt *, 749963fb2adSRandall Stewart int32_t, int32_t, uint32_t, int32_t, int32_t, uint8_t); /* Lock(a) */ 75035c7bb34SRandall Stewart struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ 75135c7bb34SRandall Stewart struct inpcb *rc_inp; /* The inpcb Lock(a) */ 75235c7bb34SRandall Stewart struct timeval rc_tv; 75335c7bb34SRandall Stewart uint32_t rc_pacer_started; /* Time we started the pacer */ 75435c7bb34SRandall Stewart uint16_t no_pacing_until:8, /* No pacing until N packet epochs */ 75535c7bb34SRandall Stewart ts_can_raise:1,/* TS b/w calculations can raise the bw higher */ 75635c7bb34SRandall Stewart skip_gain:1, /* Skip the gain cycle (hardware pacing) */ 75735c7bb34SRandall Stewart gain_is_limited:1, /* With hardware pacing we are limiting gain */ 75835c7bb34SRandall Stewart output_error_seen:1, 75935c7bb34SRandall Stewart oerror_cnt:4, 76035c7bb34SRandall Stewart hw_pacing_set:1; /* long enough has passed for us to start pacing */ 76135c7bb34SRandall Stewart uint16_t xxx_r_ack_count; /* During recovery count of ack's received 76235c7bb34SRandall Stewart * that added data since output */ 76335c7bb34SRandall Stewart uint16_t bbr_segs_rcvd; /* In Segment count since we sent a ack */ 76435c7bb34SRandall Stewart 76535c7bb34SRandall Stewart uint8_t bbr_timer_src:4, /* Used for debugging Lock(a) */ 76635c7bb34SRandall Stewart bbr_use_rack_cheat:1, /* Use the rack cheat */ 76735c7bb34SRandall Stewart bbr_init_win_cheat:1, /* Send full IW for TSO */ 76835c7bb34SRandall Stewart bbr_attempt_hdwr_pace:1,/* Try to do hardware pacing */ 76935c7bb34SRandall Stewart bbr_hdrw_pacing:1; /* Hardware pacing is available */ 77035c7bb34SRandall Stewart uint8_t bbr_hdw_pace_ena:1, /* Does the connection allow hardware pacing to be attempted */ 77135c7bb34SRandall Stewart bbr_prev_in_rec:1, /* We were previously in recovery */ 77235c7bb34SRandall Stewart pkt_conservation:1, 77335c7bb34SRandall Stewart use_policer_detection:1, 77435c7bb34SRandall Stewart xxx_bbr_hdw_pace_idx:4; /* If hardware pacing is on, index to slot in pace tbl */ 77535c7bb34SRandall Stewart uint16_t r_wanted_output:1, 77635c7bb34SRandall Stewart rtt_valid:1, 77735c7bb34SRandall Stewart rc_timer_first:1, 77835c7bb34SRandall Stewart rc_output_starts_timer:1, 77935c7bb34SRandall Stewart rc_resends_use_tso:1, 78035c7bb34SRandall Stewart rc_all_timers_stopped:1, 78135c7bb34SRandall Stewart rc_loss_exit:1, 78235c7bb34SRandall Stewart rc_ack_was_delayed:1, 78335c7bb34SRandall Stewart rc_lt_is_sampling:1, 78435c7bb34SRandall Stewart rc_filled_pipe:1, 78535c7bb34SRandall Stewart rc_tlp_new_data:1, 78635c7bb34SRandall Stewart rc_hit_state_1:1, 78735c7bb34SRandall Stewart rc_ts_valid:1, 78835c7bb34SRandall Stewart rc_prtt_set_ts:1, 78935c7bb34SRandall Stewart rc_is_pkt_epoch_now:1, 79035c7bb34SRandall Stewart rc_has_collapsed:1; 79135c7bb34SRandall Stewart 79235c7bb34SRandall Stewart uint8_t r_state:4, /* Current bbr state Lock(a) */ 79335c7bb34SRandall Stewart r_agg_early_set:1, /* Did we get called early */ 79435c7bb34SRandall Stewart r_init_rtt:1, 79535c7bb34SRandall Stewart r_use_policer:1, /* For google mode only */ 79635c7bb34SRandall Stewart r_recovery_bw:1; 79735c7bb34SRandall Stewart uint8_t r_timer_override:1, /* pacer override Lock(a) 0/1 */ 79835c7bb34SRandall Stewart rc_in_persist:1, 79935c7bb34SRandall Stewart rc_lt_use_bw:1, 80035c7bb34SRandall Stewart rc_allow_data_af_clo:1, 80135c7bb34SRandall Stewart rc_tlp_rtx_out:1, /* A TLP is in flight */ 80235c7bb34SRandall Stewart rc_tlp_in_progress:1, /* a TLP timer is running needed? */ 80335c7bb34SRandall Stewart rc_use_idle_restart:1; /* Do we restart fast after idle (persist or applim) */ 80435c7bb34SRandall Stewart uint8_t rc_bbr_state:3, /* What is the major BBR state */ 80535c7bb34SRandall Stewart rc_bbr_substate:3, /* For probeBW state */ 80635c7bb34SRandall Stewart r_is_v6:1, 80735c7bb34SRandall Stewart rc_past_init_win:1; 80835c7bb34SRandall Stewart uint8_t rc_last_options; 80935c7bb34SRandall Stewart uint8_t rc_tlp_threshold; /* Socket option value Lock(a) */ 81035c7bb34SRandall Stewart uint8_t rc_max_rto_sec; 81135c7bb34SRandall Stewart uint8_t rc_cwnd_limited:1, /* We are cwnd limited */ 81235c7bb34SRandall Stewart rc_tmr_stopped:7; /* What timers have been stopped */ 81335c7bb34SRandall Stewart uint8_t rc_use_google:1, 81435c7bb34SRandall Stewart rc_use_ts_limit:1, 81535c7bb34SRandall Stewart rc_ts_data_set:1, /* We have filled a set point to determine */ 81635c7bb34SRandall Stewart rc_ts_clock_set:1, /* We have determined the ts type */ 81735c7bb34SRandall Stewart rc_ts_cant_be_used:1, /* We determined we can't use ts values */ 81835c7bb34SRandall Stewart rc_ack_is_cumack:1, 81935c7bb34SRandall Stewart rc_no_pacing:1, 82035c7bb34SRandall Stewart alloc_limit_reported:1; 82135c7bb34SRandall Stewart uint8_t rc_init_win; 82235c7bb34SRandall Stewart /* Cache line 2 0x40 */ 82335c7bb34SRandall Stewart struct bbr_control r_ctl; 82435c7bb34SRandall Stewart #ifdef _KERNEL 82535c7bb34SRandall Stewart } __aligned(CACHE_LINE_SIZE); 82635c7bb34SRandall Stewart #else 82735c7bb34SRandall Stewart }; 82835c7bb34SRandall Stewart #endif 82935c7bb34SRandall Stewart 83035c7bb34SRandall Stewart #endif 831