xref: /freebsd/sys/netinet/tcp_stacks/tailq_hash.c (revision d0b2dbfa)
1 #include <sys/cdefs.h>
2 #include "opt_inet.h"
3 #include "opt_inet6.h"
4 #include "opt_ipsec.h"
5 #include "opt_ratelimit.h"
6 #include "opt_kern_tls.h"
7 #include <sys/param.h>
8 #include <sys/arb.h>
9 #include <sys/module.h>
10 #include <sys/kernel.h>
11 #ifdef TCP_HHOOK
12 #include <sys/hhook.h>
13 #endif
14 #include <sys/lock.h>
15 #include <sys/malloc.h>
16 #include <sys/lock.h>
17 #include <sys/mutex.h>
18 #include <sys/mbuf.h>
19 #include <sys/proc.h>		/* for proc0 declaration */
20 #include <sys/socket.h>
21 #include <sys/socketvar.h>
22 #include <sys/sysctl.h>
23 #include <sys/systm.h>
24 #ifdef STATS
25 #include <sys/qmath.h>
26 #include <sys/tree.h>
27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
28 #else
29 #include <sys/tree.h>
30 #endif
31 #include <sys/refcount.h>
32 #include <sys/queue.h>
33 #include <sys/tim_filter.h>
34 #include <sys/smp.h>
35 #include <sys/kthread.h>
36 #include <sys/kern_prefetch.h>
37 #include <sys/protosw.h>
38 #ifdef TCP_ACCOUNTING
39 #include <sys/sched.h>
40 #include <machine/cpu.h>
41 #endif
42 #include <vm/uma.h>
43 
44 #include <net/route.h>
45 #include <net/route/nhop.h>
46 #include <net/vnet.h>
47 
48 #define TCPSTATES		/* for logging */
49 
50 #include <netinet/in.h>
51 #include <netinet/in_kdtrace.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
55 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
56 #include <netinet/ip_var.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_pcb.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/tcp.h>
61 #define	TCPOUTFLAGS
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_seq.h>
64 #include <netinet/tcp_timer.h>
65 #include <netinet/tcp_var.h>
66 #include <netinet/tcp_log_buf.h>
67 #include <netinet/tcp_syncache.h>
68 #include <netinet/tcp_hpts.h>
69 #include <netinet/tcp_ratelimit.h>
70 #include <netinet/tcp_accounting.h>
71 #include <netinet/tcpip.h>
72 #include <netinet/cc/cc.h>
73 #include <netinet/cc/cc_newreno.h>
74 #include <netinet/tcp_fastopen.h>
75 #include <netinet/tcp_lro.h>
76 #ifdef NETFLIX_SHARED_CWND
77 #include <netinet/tcp_shared_cwnd.h>
78 #endif
79 #ifdef TCP_OFFLOAD
80 #include <netinet/tcp_offload.h>
81 #endif
82 #ifdef INET6
83 #include <netinet6/tcp6_var.h>
84 #endif
85 #include <netinet/tcp_ecn.h>
86 
87 #include <netipsec/ipsec_support.h>
88 
89 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
90 #include <netipsec/ipsec.h>
91 #include <netipsec/ipsec6.h>
92 #endif				/* IPSEC */
93 
94 #include <netinet/udp.h>
95 #include <netinet/udp_var.h>
96 #include <machine/in_cksum.h>
97 
98 #ifdef MAC
99 #include <security/mac/mac_framework.h>
100 #endif
101 #include "sack_filter.h"
102 #include "tcp_rack.h"
103 #include "tailq_hash.h"
104 
105 
106 struct rack_sendmap *
107 tqhash_min(struct tailq_hash *hs)
108 {
109 	struct rack_sendmap *rsm;
110 
111 	rsm = tqhash_find(hs, hs->min);
112 	return(rsm);
113 }
114 
115 struct rack_sendmap *
116 tqhash_max(struct tailq_hash *hs)
117 {
118 	struct rack_sendmap *rsm;
119 
120 	rsm = tqhash_find(hs, (hs->max - 1));
121 	return (rsm);
122 }
123 
124 int
125 tqhash_empty(struct tailq_hash *hs)
126 {
127 	if (hs->count == 0)
128 		return(1);
129 	return(0);
130 }
131 
132 struct rack_sendmap *
133 tqhash_find(struct tailq_hash *hs, uint32_t seq)
134 {
135 	struct rack_sendmap *e;
136 	int bindex, pbucket, fc = 1;
137 
138 	if ((SEQ_LT(seq, hs->min)) ||
139 	    (hs->count == 0) ||
140 	    (SEQ_GEQ(seq, hs->max))) {
141 		/* Not here */
142 		return (NULL);
143 	}
144 	bindex = seq / SEQ_BUCKET_SIZE;
145 	bindex %= MAX_HASH_ENTRIES;
146 	/* Lets look through the bucket it belongs to */
147 	if (TAILQ_EMPTY(&hs->ht[bindex])) {
148 		goto look_backwards;
149 	}
150 	TAILQ_FOREACH(e, &hs->ht[bindex], next) {
151 		if (fc == 1) {
152 			/*
153 			 * Special check for when a cum-ack
154 			 * as moved up over a seq and now its
155 			 * a bucket behind where it belongs. In
156 			 * the case of SACKs which create new rsm's
157 			 * this won't occur.
158 			 */
159 			if (SEQ_GT(e->r_start, seq)) {
160 				goto look_backwards;
161 			}
162 			fc = 0;
163 		}
164 		if (SEQ_GEQ(seq, e->r_start) &&
165 		    (SEQ_LT(seq, e->r_end))) {
166 			/* Its in this block */
167 			return (e);
168 		}
169 	}
170 	/* Did not find it */
171 	return (NULL);
172 look_backwards:
173 	if (bindex == 0)
174 		pbucket = MAX_HASH_ENTRIES - 1;
175 	else
176 		pbucket = bindex - 1;
177 	TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) {
178 		if (SEQ_GEQ(seq, e->r_start) &&
179 		    (SEQ_LT(seq, e->r_end))) {
180 			/* Its in this block */
181 			return (e);
182 		}
183 		if (SEQ_GEQ(e->r_end, seq))
184 			break;
185 	}
186 	return (NULL);
187 }
188 
189 struct rack_sendmap *
190 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm)
191 {
192 	struct rack_sendmap *e;
193 
194 	e = TAILQ_NEXT(rsm, next);
195 	if (e == NULL) {
196 		/* Move to next bucket */
197 		int nxt;
198 
199 		nxt = rsm->bindex + 1;
200 		if (nxt >= MAX_HASH_ENTRIES)
201 			nxt = 0;
202 		e = TAILQ_FIRST(&hs->ht[nxt]);
203 	}
204 	return(e);
205 }
206 
207 struct rack_sendmap *
208 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm)
209 {
210 	struct rack_sendmap *e;
211 
212 	e = TAILQ_PREV(rsm, rack_head, next);
213 	if (e == NULL) {
214 		int prev;
215 
216 		if (rsm->bindex > 0)
217 			prev = rsm->bindex - 1;
218 		else
219 			prev = MAX_HASH_ENTRIES - 1;
220 		e = TAILQ_LAST(&hs->ht[prev], rack_head);
221 	}
222 	return (e);
223 }
224 
225 void
226 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
227 {
228 	TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
229 	hs->count--;
230 	if (hs->count == 0) {
231 		hs->min = hs->max;
232 	} else if (type == REMOVE_TYPE_CUMACK) {
233 		hs->min = rsm->r_end;
234 	}
235 }
236 
237 int
238 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
239 {
240 	struct rack_sendmap *e, *l;
241 	int inserted = 0;
242 	uint32_t ebucket;
243 
244 	if (hs->count > 0) {
245 		if ((rsm->r_end - hs->min) >  MAX_ALLOWED_SEQ_RANGE) {
246 			return (-1);
247 		}
248 		e = tqhash_find(hs, rsm->r_start);
249 		if (e) {
250 			return (-2);
251 		}
252 	}
253 	rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
254 	rsm->bindex %= MAX_HASH_ENTRIES;
255 	ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
256 	ebucket %= MAX_HASH_ENTRIES;
257 	if (ebucket != rsm->bindex) {
258 		/* This RSM straddles the bucket boundary */
259 		rsm->r_flags |= RACK_STRADDLE;
260 	} else {
261 		rsm->r_flags &= ~RACK_STRADDLE;
262 	}
263 	if (hs->count == 0) {
264 		/* Special case */
265 		hs->min = rsm->r_start;
266 		hs->max = rsm->r_end;
267 		hs->count = 1;
268 	} else {
269 		hs->count++;
270 		if (SEQ_GT(rsm->r_end, hs->max))
271 			hs->max = rsm->r_end;
272 		if (SEQ_LT(rsm->r_start, hs->min))
273 			hs->min = rsm->r_start;
274 	}
275 	/* Check the common case of inserting at the end */
276 	l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
277 	if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) {
278 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
279 		return (0);
280 	}
281 	TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) {
282 		if (SEQ_LEQ(rsm->r_start, e->r_start)) {
283 			inserted = 1;
284 			TAILQ_INSERT_BEFORE(e, rsm, next);
285 			break;
286 		}
287 	}
288 	if (inserted == 0) {
289 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
290 	}
291 	return (0);
292 }
293 
294 void
295 tqhash_init(struct tailq_hash *hs)
296 {
297 	int i;
298 
299 	for(i = 0; i < MAX_HASH_ENTRIES; i++) {
300 		TAILQ_INIT(&hs->ht[i]);
301 	}
302 	hs->min = hs->max = 0;
303 	hs->count = 0;
304 }
305 
306 int
307 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack)
308 {
309 	struct rack_sendmap *rsm;
310 
311 	if (SEQ_LT(th_ack, hs->min)) {
312 		/* It can't be behind our current min */
313 		return (-1);
314 	}
315 	if (SEQ_GEQ(th_ack, hs->max)) {
316 		/*  It can't be beyond or at our current max */
317 		return (-2);
318 	}
319 	rsm = tqhash_min(hs);
320 	if (rsm == NULL) {
321 		/* nothing to trim */
322 		return (-3);
323 	}
324 	if (SEQ_GEQ(th_ack, rsm->r_end)) {
325 		/*
326 		 * You can't trim all bytes instead
327 		 * you need to remove it.
328 		 */
329 		return (-4);
330 	}
331 	if (SEQ_GT(th_ack, hs->min))
332 	    hs->min = th_ack;
333 	/*
334 	 * Should we trim it for the caller?
335 	 * they may have already which is ok...
336 	 */
337 	if (SEQ_GT(th_ack, rsm->r_start)) {
338 		rsm->r_start = th_ack;
339 	}
340 	return (0);
341 }
342 
343