xref: /freebsd/sys/netinet/tcp_stacks/tailq_hash.c (revision 2b833162)
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3 
4 #include "opt_inet.h"
5 #include "opt_inet6.h"
6 #include "opt_ipsec.h"
7 #include "opt_ratelimit.h"
8 #include "opt_kern_tls.h"
9 #include <sys/param.h>
10 #include <sys/arb.h>
11 #include <sys/module.h>
12 #include <sys/kernel.h>
13 #ifdef TCP_HHOOK
14 #include <sys/hhook.h>
15 #endif
16 #include <sys/lock.h>
17 #include <sys/malloc.h>
18 #include <sys/lock.h>
19 #include <sys/mutex.h>
20 #include <sys/mbuf.h>
21 #include <sys/proc.h>		/* for proc0 declaration */
22 #include <sys/socket.h>
23 #include <sys/socketvar.h>
24 #include <sys/sysctl.h>
25 #include <sys/systm.h>
26 #ifdef STATS
27 #include <sys/qmath.h>
28 #include <sys/tree.h>
29 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
30 #else
31 #include <sys/tree.h>
32 #endif
33 #include <sys/refcount.h>
34 #include <sys/queue.h>
35 #include <sys/tim_filter.h>
36 #include <sys/smp.h>
37 #include <sys/kthread.h>
38 #include <sys/kern_prefetch.h>
39 #include <sys/protosw.h>
40 #ifdef TCP_ACCOUNTING
41 #include <sys/sched.h>
42 #include <machine/cpu.h>
43 #endif
44 #include <vm/uma.h>
45 
46 #include <net/route.h>
47 #include <net/route/nhop.h>
48 #include <net/vnet.h>
49 
50 #define TCPSTATES		/* for logging */
51 
52 #include <netinet/in.h>
53 #include <netinet/in_kdtrace.h>
54 #include <netinet/in_pcb.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
57 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
58 #include <netinet/ip_var.h>
59 #include <netinet/ip6.h>
60 #include <netinet6/in6_pcb.h>
61 #include <netinet6/ip6_var.h>
62 #include <netinet/tcp.h>
63 #define	TCPOUTFLAGS
64 #include <netinet/tcp_fsm.h>
65 #include <netinet/tcp_seq.h>
66 #include <netinet/tcp_timer.h>
67 #include <netinet/tcp_var.h>
68 #include <netinet/tcp_log_buf.h>
69 #include <netinet/tcp_syncache.h>
70 #include <netinet/tcp_hpts.h>
71 #include <netinet/tcp_ratelimit.h>
72 #include <netinet/tcp_accounting.h>
73 #include <netinet/tcpip.h>
74 #include <netinet/cc/cc.h>
75 #include <netinet/cc/cc_newreno.h>
76 #include <netinet/tcp_fastopen.h>
77 #include <netinet/tcp_lro.h>
78 #ifdef NETFLIX_SHARED_CWND
79 #include <netinet/tcp_shared_cwnd.h>
80 #endif
81 #ifdef TCP_OFFLOAD
82 #include <netinet/tcp_offload.h>
83 #endif
84 #ifdef INET6
85 #include <netinet6/tcp6_var.h>
86 #endif
87 #include <netinet/tcp_ecn.h>
88 
89 #include <netipsec/ipsec_support.h>
90 
91 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
92 #include <netipsec/ipsec.h>
93 #include <netipsec/ipsec6.h>
94 #endif				/* IPSEC */
95 
96 #include <netinet/udp.h>
97 #include <netinet/udp_var.h>
98 #include <machine/in_cksum.h>
99 
100 #ifdef MAC
101 #include <security/mac/mac_framework.h>
102 #endif
103 #include "sack_filter.h"
104 #include "tcp_rack.h"
105 #include "tailq_hash.h"
106 
107 
108 struct rack_sendmap *
109 tqhash_min(struct tailq_hash *hs)
110 {
111 	struct rack_sendmap *rsm;
112 
113 	rsm = tqhash_find(hs, hs->min);
114 	return(rsm);
115 }
116 
117 struct rack_sendmap *
118 tqhash_max(struct tailq_hash *hs)
119 {
120 	struct rack_sendmap *rsm;
121 
122 	rsm = tqhash_find(hs, (hs->max - 1));
123 	return (rsm);
124 }
125 
126 int
127 tqhash_empty(struct tailq_hash *hs)
128 {
129 	if (hs->count == 0)
130 		return(1);
131 	return(0);
132 }
133 
134 struct rack_sendmap *
135 tqhash_find(struct tailq_hash *hs, uint32_t seq)
136 {
137 	struct rack_sendmap *e;
138 	int bindex, pbucket, fc = 1;
139 
140 	if ((SEQ_LT(seq, hs->min)) ||
141 	    (hs->count == 0) ||
142 	    (SEQ_GEQ(seq, hs->max))) {
143 		/* Not here */
144 		return (NULL);
145 	}
146 	bindex = seq / SEQ_BUCKET_SIZE;
147 	bindex %= MAX_HASH_ENTRIES;
148 	/* Lets look through the bucket it belongs to */
149 	if (TAILQ_EMPTY(&hs->ht[bindex])) {
150 		goto look_backwards;
151 	}
152 	TAILQ_FOREACH(e, &hs->ht[bindex], next) {
153 		if (fc == 1) {
154 			/*
155 			 * Special check for when a cum-ack
156 			 * as moved up over a seq and now its
157 			 * a bucket behind where it belongs. In
158 			 * the case of SACKs which create new rsm's
159 			 * this won't occur.
160 			 */
161 			if (SEQ_GT(e->r_start, seq)) {
162 				goto look_backwards;
163 			}
164 			fc = 0;
165 		}
166 		if (SEQ_GEQ(seq, e->r_start) &&
167 		    (SEQ_LT(seq, e->r_end))) {
168 			/* Its in this block */
169 			return (e);
170 		}
171 	}
172 	/* Did not find it */
173 	return (NULL);
174 look_backwards:
175 	if (bindex == 0)
176 		pbucket = MAX_HASH_ENTRIES - 1;
177 	else
178 		pbucket = bindex - 1;
179 	TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) {
180 		if (SEQ_GEQ(seq, e->r_start) &&
181 		    (SEQ_LT(seq, e->r_end))) {
182 			/* Its in this block */
183 			return (e);
184 		}
185 		if (SEQ_GEQ(e->r_end, seq))
186 			break;
187 	}
188 	return (NULL);
189 }
190 
191 struct rack_sendmap *
192 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm)
193 {
194 	struct rack_sendmap *e;
195 
196 	e = TAILQ_NEXT(rsm, next);
197 	if (e == NULL) {
198 		/* Move to next bucket */
199 		int nxt;
200 
201 		nxt = rsm->bindex + 1;
202 		if (nxt >= MAX_HASH_ENTRIES)
203 			nxt = 0;
204 		e = TAILQ_FIRST(&hs->ht[nxt]);
205 	}
206 	return(e);
207 }
208 
209 struct rack_sendmap *
210 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm)
211 {
212 	struct rack_sendmap *e;
213 
214 	e = TAILQ_PREV(rsm, rack_head, next);
215 	if (e == NULL) {
216 		int prev;
217 
218 		if (rsm->bindex > 0)
219 			prev = rsm->bindex - 1;
220 		else
221 			prev = MAX_HASH_ENTRIES - 1;
222 		e = TAILQ_LAST(&hs->ht[prev], rack_head);
223 	}
224 	return (e);
225 }
226 
227 void
228 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
229 {
230 	TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
231 	hs->count--;
232 	if (hs->count == 0) {
233 		hs->min = hs->max;
234 	} else if (type == REMOVE_TYPE_CUMACK) {
235 		hs->min = rsm->r_end;
236 	}
237 }
238 
239 int
240 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
241 {
242 	struct rack_sendmap *e, *l;
243 	int inserted = 0;
244 	uint32_t ebucket;
245 
246 	if (hs->count > 0) {
247 		if ((rsm->r_end - hs->min) >  MAX_ALLOWED_SEQ_RANGE) {
248 			return (-1);
249 		}
250 		e = tqhash_find(hs, rsm->r_start);
251 		if (e) {
252 			return (-2);
253 		}
254 	}
255 	rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
256 	rsm->bindex %= MAX_HASH_ENTRIES;
257 	ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
258 	ebucket %= MAX_HASH_ENTRIES;
259 	if (ebucket != rsm->bindex) {
260 		/* This RSM straddles the bucket boundary */
261 		rsm->r_flags |= RACK_STRADDLE;
262 	} else {
263 		rsm->r_flags &= ~RACK_STRADDLE;
264 	}
265 	if (hs->count == 0) {
266 		/* Special case */
267 		hs->min = rsm->r_start;
268 		hs->max = rsm->r_end;
269 		hs->count = 1;
270 	} else {
271 		hs->count++;
272 		if (SEQ_GT(rsm->r_end, hs->max))
273 			hs->max = rsm->r_end;
274 		if (SEQ_LT(rsm->r_start, hs->min))
275 			hs->min = rsm->r_start;
276 	}
277 	/* Check the common case of inserting at the end */
278 	l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
279 	if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) {
280 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
281 		return (0);
282 	}
283 	TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) {
284 		if (SEQ_LEQ(rsm->r_start, e->r_start)) {
285 			inserted = 1;
286 			TAILQ_INSERT_BEFORE(e, rsm, next);
287 			break;
288 		}
289 	}
290 	if (inserted == 0) {
291 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
292 	}
293 	return (0);
294 }
295 
296 void
297 tqhash_init(struct tailq_hash *hs)
298 {
299 	int i;
300 
301 	for(i = 0; i < MAX_HASH_ENTRIES; i++) {
302 		TAILQ_INIT(&hs->ht[i]);
303 	}
304 	hs->min = hs->max = 0;
305 	hs->count = 0;
306 }
307 
308 int
309 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack)
310 {
311 	struct rack_sendmap *rsm;
312 
313 	if (SEQ_LT(th_ack, hs->min)) {
314 		/* It can't be behind our current min */
315 		return (-1);
316 	}
317 	if (SEQ_GEQ(th_ack, hs->max)) {
318 		/*  It can't be beyond or at our current max */
319 		return (-2);
320 	}
321 	rsm = tqhash_min(hs);
322 	if (rsm == NULL) {
323 		/* nothing to trim */
324 		return (-3);
325 	}
326 	if (SEQ_GEQ(th_ack, rsm->r_end)) {
327 		/*
328 		 * You can't trim all bytes instead
329 		 * you need to remove it.
330 		 */
331 		return (-4);
332 	}
333 	if (SEQ_GT(th_ack, hs->min))
334 	    hs->min = th_ack;
335 	/*
336 	 * Should we trim it for the caller?
337 	 * they may have already which is ok...
338 	 */
339 	if (SEQ_GT(th_ack, rsm->r_start)) {
340 		rsm->r_start = th_ack;
341 	}
342 	return (0);
343 }
344 
345