xref: /linux/net/ipv6/tcp_ipv6.c (revision f4dca95f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/hotdata.h>
62 #include <net/busy_poll.h>
63 #include <net/rstreason.h>
64 
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
67 
68 #include <crypto/hash.h>
69 #include <linux/scatterlist.h>
70 
71 #include <trace/events/tcp.h>
72 
73 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
74 			      enum sk_rst_reason reason);
75 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
76 				      struct request_sock *req);
77 
78 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79 
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 const struct inet_connection_sock_af_ops ipv6_specific;
82 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
85 #endif
86 
87 /* Helper returning the inet6 address from a given tcp socket.
88  * It can be used in TCP stack instead of inet6_sk(sk).
89  * This avoids a dereference and allow compiler optimizations.
90  * It is a specialized version of inet6_sk_generic().
91  */
92 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
93 					      struct tcp6_sock, tcp)->inet6)
94 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)95 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
96 {
97 	struct dst_entry *dst = skb_dst(skb);
98 
99 	if (dst && dst_hold_safe(dst)) {
100 		rcu_assign_pointer(sk->sk_rx_dst, dst);
101 		sk->sk_rx_dst_ifindex = skb->skb_iif;
102 		sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
103 	}
104 }
105 
tcp_v6_init_seq(const struct sk_buff * skb)106 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
107 {
108 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
109 				ipv6_hdr(skb)->saddr.s6_addr32,
110 				tcp_hdr(skb)->dest,
111 				tcp_hdr(skb)->source);
112 }
113 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)114 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
115 {
116 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
117 				   ipv6_hdr(skb)->saddr.s6_addr32);
118 }
119 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)120 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
121 			      int addr_len)
122 {
123 	/* This check is replicated from tcp_v6_connect() and intended to
124 	 * prevent BPF program called below from accessing bytes that are out
125 	 * of the bound specified by user in addr_len.
126 	 */
127 	if (addr_len < SIN6_LEN_RFC2133)
128 		return -EINVAL;
129 
130 	sock_owned_by_me(sk);
131 
132 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
133 }
134 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)135 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
136 			  int addr_len)
137 {
138 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139 	struct inet_connection_sock *icsk = inet_csk(sk);
140 	struct in6_addr *saddr = NULL, *final_p, final;
141 	struct inet_timewait_death_row *tcp_death_row;
142 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
143 	struct inet_sock *inet = inet_sk(sk);
144 	struct tcp_sock *tp = tcp_sk(sk);
145 	struct net *net = sock_net(sk);
146 	struct ipv6_txoptions *opt;
147 	struct dst_entry *dst;
148 	struct flowi6 fl6;
149 	int addr_type;
150 	int err;
151 
152 	if (addr_len < SIN6_LEN_RFC2133)
153 		return -EINVAL;
154 
155 	if (usin->sin6_family != AF_INET6)
156 		return -EAFNOSUPPORT;
157 
158 	memset(&fl6, 0, sizeof(fl6));
159 
160 	if (inet6_test_bit(SNDFLOW, sk)) {
161 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
162 		IP6_ECN_flow_init(fl6.flowlabel);
163 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
164 			struct ip6_flowlabel *flowlabel;
165 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
166 			if (IS_ERR(flowlabel))
167 				return -EINVAL;
168 			fl6_sock_release(flowlabel);
169 		}
170 	}
171 
172 	/*
173 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
174 	 */
175 
176 	if (ipv6_addr_any(&usin->sin6_addr)) {
177 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
178 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
179 					       &usin->sin6_addr);
180 		else
181 			usin->sin6_addr = in6addr_loopback;
182 	}
183 
184 	addr_type = ipv6_addr_type(&usin->sin6_addr);
185 
186 	if (addr_type & IPV6_ADDR_MULTICAST)
187 		return -ENETUNREACH;
188 
189 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
190 		if (addr_len >= sizeof(struct sockaddr_in6) &&
191 		    usin->sin6_scope_id) {
192 			/* If interface is set while binding, indices
193 			 * must coincide.
194 			 */
195 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
196 				return -EINVAL;
197 
198 			sk->sk_bound_dev_if = usin->sin6_scope_id;
199 		}
200 
201 		/* Connect to link-local address requires an interface */
202 		if (!sk->sk_bound_dev_if)
203 			return -EINVAL;
204 	}
205 
206 	if (tp->rx_opt.ts_recent_stamp &&
207 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
208 		tp->rx_opt.ts_recent = 0;
209 		tp->rx_opt.ts_recent_stamp = 0;
210 		WRITE_ONCE(tp->write_seq, 0);
211 	}
212 
213 	sk->sk_v6_daddr = usin->sin6_addr;
214 	np->flow_label = fl6.flowlabel;
215 
216 	/*
217 	 *	TCP over IPv4
218 	 */
219 
220 	if (addr_type & IPV6_ADDR_MAPPED) {
221 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
222 		struct sockaddr_in sin;
223 
224 		if (ipv6_only_sock(sk))
225 			return -ENETUNREACH;
226 
227 		sin.sin_family = AF_INET;
228 		sin.sin_port = usin->sin6_port;
229 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
230 
231 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
232 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
233 		if (sk_is_mptcp(sk))
234 			mptcpv6_handle_mapped(sk, true);
235 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
236 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
237 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
238 #endif
239 
240 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
241 
242 		if (err) {
243 			icsk->icsk_ext_hdr_len = exthdrlen;
244 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
245 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
246 			if (sk_is_mptcp(sk))
247 				mptcpv6_handle_mapped(sk, false);
248 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
249 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
250 			tp->af_specific = &tcp_sock_ipv6_specific;
251 #endif
252 			goto failure;
253 		}
254 		np->saddr = sk->sk_v6_rcv_saddr;
255 
256 		return err;
257 	}
258 
259 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
260 		saddr = &sk->sk_v6_rcv_saddr;
261 
262 	fl6.flowi6_proto = IPPROTO_TCP;
263 	fl6.daddr = sk->sk_v6_daddr;
264 	fl6.saddr = saddr ? *saddr : np->saddr;
265 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
266 	fl6.flowi6_oif = sk->sk_bound_dev_if;
267 	fl6.flowi6_mark = sk->sk_mark;
268 	fl6.fl6_dport = usin->sin6_port;
269 	fl6.fl6_sport = inet->inet_sport;
270 	fl6.flowi6_uid = sk->sk_uid;
271 
272 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
273 	final_p = fl6_update_dst(&fl6, opt, &final);
274 
275 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
276 
277 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
278 	if (IS_ERR(dst)) {
279 		err = PTR_ERR(dst);
280 		goto failure;
281 	}
282 
283 	tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
284 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
285 
286 	if (!saddr) {
287 		saddr = &fl6.saddr;
288 
289 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
290 		if (err)
291 			goto failure;
292 	}
293 
294 	/* set the source address */
295 	np->saddr = *saddr;
296 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 
298 	sk->sk_gso_type = SKB_GSO_TCPV6;
299 	ip6_dst_store(sk, dst, NULL, NULL);
300 
301 	icsk->icsk_ext_hdr_len = 0;
302 	if (opt)
303 		icsk->icsk_ext_hdr_len = opt->opt_flen +
304 					 opt->opt_nflen;
305 
306 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 
308 	inet->inet_dport = usin->sin6_port;
309 
310 	tcp_set_state(sk, TCP_SYN_SENT);
311 	err = inet6_hash_connect(tcp_death_row, sk);
312 	if (err)
313 		goto late_failure;
314 
315 	sk_set_txhash(sk);
316 
317 	if (likely(!tp->repair)) {
318 		if (!tp->write_seq)
319 			WRITE_ONCE(tp->write_seq,
320 				   secure_tcpv6_seq(np->saddr.s6_addr32,
321 						    sk->sk_v6_daddr.s6_addr32,
322 						    inet->inet_sport,
323 						    inet->inet_dport));
324 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
325 						   sk->sk_v6_daddr.s6_addr32);
326 	}
327 
328 	if (tcp_fastopen_defer_connect(sk, &err))
329 		return err;
330 	if (err)
331 		goto late_failure;
332 
333 	err = tcp_connect(sk);
334 	if (err)
335 		goto late_failure;
336 
337 	return 0;
338 
339 late_failure:
340 	tcp_set_state(sk, TCP_CLOSE);
341 	inet_bhash2_reset_saddr(sk);
342 failure:
343 	inet->inet_dport = 0;
344 	sk->sk_route_caps = 0;
345 	return err;
346 }
347 
tcp_v6_mtu_reduced(struct sock * sk)348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 	struct dst_entry *dst;
351 	u32 mtu;
352 
353 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354 		return;
355 
356 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357 
358 	/* Drop requests trying to increase our current mss.
359 	 * Check done in __ip6_rt_update_pmtu() is too late.
360 	 */
361 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362 		return;
363 
364 	dst = inet6_csk_update_pmtu(sk, mtu);
365 	if (!dst)
366 		return;
367 
368 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369 		tcp_sync_mss(sk, dst_mtu(dst));
370 		tcp_simple_retransmit(sk);
371 	}
372 }
373 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375 		u8 type, u8 code, int offset, __be32 info)
376 {
377 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 	struct net *net = dev_net(skb->dev);
380 	struct request_sock *fastopen;
381 	struct ipv6_pinfo *np;
382 	struct tcp_sock *tp;
383 	__u32 seq, snd_una;
384 	struct sock *sk;
385 	bool fatal;
386 	int err;
387 
388 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
389 					&hdr->daddr, th->dest,
390 					&hdr->saddr, ntohs(th->source),
391 					skb->dev->ifindex, inet6_sdif(skb));
392 
393 	if (!sk) {
394 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395 				  ICMP6_MIB_INERRORS);
396 		return -ENOENT;
397 	}
398 
399 	if (sk->sk_state == TCP_TIME_WAIT) {
400 		/* To increase the counter of ignored icmps for TCP-AO */
401 		tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
402 		inet_twsk_put(inet_twsk(sk));
403 		return 0;
404 	}
405 	seq = ntohl(th->seq);
406 	fatal = icmpv6_err_convert(type, code, &err);
407 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
408 		tcp_req_err(sk, seq, fatal);
409 		return 0;
410 	}
411 
412 	if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
413 		sock_put(sk);
414 		return 0;
415 	}
416 
417 	bh_lock_sock(sk);
418 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
419 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
420 
421 	if (sk->sk_state == TCP_CLOSE)
422 		goto out;
423 
424 	if (static_branch_unlikely(&ip6_min_hopcount)) {
425 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
426 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
427 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
428 			goto out;
429 		}
430 	}
431 
432 	tp = tcp_sk(sk);
433 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
434 	fastopen = rcu_dereference(tp->fastopen_rsk);
435 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
436 	if (sk->sk_state != TCP_LISTEN &&
437 	    !between(seq, snd_una, tp->snd_nxt)) {
438 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
439 		goto out;
440 	}
441 
442 	np = tcp_inet6_sk(sk);
443 
444 	if (type == NDISC_REDIRECT) {
445 		if (!sock_owned_by_user(sk)) {
446 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
447 
448 			if (dst)
449 				dst->ops->redirect(dst, sk, skb);
450 		}
451 		goto out;
452 	}
453 
454 	if (type == ICMPV6_PKT_TOOBIG) {
455 		u32 mtu = ntohl(info);
456 
457 		/* We are not interested in TCP_LISTEN and open_requests
458 		 * (SYN-ACKs send out by Linux are always <576bytes so
459 		 * they should go through unfragmented).
460 		 */
461 		if (sk->sk_state == TCP_LISTEN)
462 			goto out;
463 
464 		if (!ip6_sk_accept_pmtu(sk))
465 			goto out;
466 
467 		if (mtu < IPV6_MIN_MTU)
468 			goto out;
469 
470 		WRITE_ONCE(tp->mtu_info, mtu);
471 
472 		if (!sock_owned_by_user(sk))
473 			tcp_v6_mtu_reduced(sk);
474 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
475 					   &sk->sk_tsq_flags))
476 			sock_hold(sk);
477 		goto out;
478 	}
479 
480 
481 	/* Might be for an request_sock */
482 	switch (sk->sk_state) {
483 	case TCP_SYN_SENT:
484 	case TCP_SYN_RECV:
485 		/* Only in fast or simultaneous open. If a fast open socket is
486 		 * already accepted it is treated as a connected one below.
487 		 */
488 		if (fastopen && !fastopen->sk)
489 			break;
490 
491 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
492 
493 		if (!sock_owned_by_user(sk)) {
494 			WRITE_ONCE(sk->sk_err, err);
495 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
496 
497 			tcp_done(sk);
498 		} else {
499 			WRITE_ONCE(sk->sk_err_soft, err);
500 		}
501 		goto out;
502 	case TCP_LISTEN:
503 		break;
504 	default:
505 		/* check if this ICMP message allows revert of backoff.
506 		 * (see RFC 6069)
507 		 */
508 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
509 		    code == ICMPV6_NOROUTE)
510 			tcp_ld_RTO_revert(sk, seq);
511 	}
512 
513 	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
514 		WRITE_ONCE(sk->sk_err, err);
515 		sk_error_report(sk);
516 	} else {
517 		WRITE_ONCE(sk->sk_err_soft, err);
518 	}
519 out:
520 	bh_unlock_sock(sk);
521 	sock_put(sk);
522 	return 0;
523 }
524 
525 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)526 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
527 			      struct flowi *fl,
528 			      struct request_sock *req,
529 			      struct tcp_fastopen_cookie *foc,
530 			      enum tcp_synack_type synack_type,
531 			      struct sk_buff *syn_skb)
532 {
533 	struct inet_request_sock *ireq = inet_rsk(req);
534 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
535 	struct ipv6_txoptions *opt;
536 	struct flowi6 *fl6 = &fl->u.ip6;
537 	struct sk_buff *skb;
538 	int err = -ENOMEM;
539 	u8 tclass;
540 
541 	/* First, grab a route. */
542 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
543 					       IPPROTO_TCP)) == NULL)
544 		goto done;
545 
546 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
547 
548 	if (skb) {
549 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
550 				    &ireq->ir_v6_rmt_addr);
551 
552 		fl6->daddr = ireq->ir_v6_rmt_addr;
553 		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
554 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555 
556 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
557 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
558 				(np->tclass & INET_ECN_MASK) :
559 				np->tclass;
560 
561 		if (!INET_ECN_is_capable(tclass) &&
562 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
563 			tclass |= INET_ECN_ECT_0;
564 
565 		rcu_read_lock();
566 		opt = ireq->ipv6_opt;
567 		if (!opt)
568 			opt = rcu_dereference(np->opt);
569 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
570 			       opt, tclass, READ_ONCE(sk->sk_priority));
571 		rcu_read_unlock();
572 		err = net_xmit_eval(err);
573 	}
574 
575 done:
576 	return err;
577 }
578 
579 
tcp_v6_reqsk_destructor(struct request_sock * req)580 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 {
582 	kfree(inet_rsk(req)->ipv6_opt);
583 	consume_skb(inet_rsk(req)->pktopts);
584 }
585 
586 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
588 						   const struct in6_addr *addr,
589 						   int l3index)
590 {
591 	return tcp_md5_do_lookup(sk, l3index,
592 				 (union tcp_md5_addr *)addr, AF_INET6);
593 }
594 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
596 						const struct sock *addr_sk)
597 {
598 	int l3index;
599 
600 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
601 						 addr_sk->sk_bound_dev_if);
602 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
603 				    l3index);
604 }
605 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
607 				 sockptr_t optval, int optlen)
608 {
609 	struct tcp_md5sig cmd;
610 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
611 	union tcp_ao_addr *addr;
612 	int l3index = 0;
613 	u8 prefixlen;
614 	bool l3flag;
615 	u8 flags;
616 
617 	if (optlen < sizeof(cmd))
618 		return -EINVAL;
619 
620 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
621 		return -EFAULT;
622 
623 	if (sin6->sin6_family != AF_INET6)
624 		return -EINVAL;
625 
626 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
627 	l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
628 
629 	if (optname == TCP_MD5SIG_EXT &&
630 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
631 		prefixlen = cmd.tcpm_prefixlen;
632 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
633 					prefixlen > 32))
634 			return -EINVAL;
635 	} else {
636 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
637 	}
638 
639 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
640 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
641 		struct net_device *dev;
642 
643 		rcu_read_lock();
644 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
645 		if (dev && netif_is_l3_master(dev))
646 			l3index = dev->ifindex;
647 		rcu_read_unlock();
648 
649 		/* ok to reference set/not set outside of rcu;
650 		 * right now device MUST be an L3 master
651 		 */
652 		if (!dev || !l3index)
653 			return -EINVAL;
654 	}
655 
656 	if (!cmd.tcpm_keylen) {
657 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659 					      AF_INET, prefixlen,
660 					      l3index, flags);
661 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
662 				      AF_INET6, prefixlen, l3index, flags);
663 	}
664 
665 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
666 		return -EINVAL;
667 
668 	if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
669 		addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
670 
671 		/* Don't allow keys for peers that have a matching TCP-AO key.
672 		 * See the comment in tcp_ao_add_cmd()
673 		 */
674 		if (tcp_ao_required(sk, addr, AF_INET,
675 				    l3flag ? l3index : -1, false))
676 			return -EKEYREJECTED;
677 		return tcp_md5_do_add(sk, addr,
678 				      AF_INET, prefixlen, l3index, flags,
679 				      cmd.tcpm_key, cmd.tcpm_keylen);
680 	}
681 
682 	addr = (union tcp_md5_addr *)&sin6->sin6_addr;
683 
684 	/* Don't allow keys for peers that have a matching TCP-AO key.
685 	 * See the comment in tcp_ao_add_cmd()
686 	 */
687 	if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
688 		return -EKEYREJECTED;
689 
690 	return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
691 			      cmd.tcpm_key, cmd.tcpm_keylen);
692 }
693 
tcp_v6_md5_hash_headers(struct tcp_sigpool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)694 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
695 				   const struct in6_addr *daddr,
696 				   const struct in6_addr *saddr,
697 				   const struct tcphdr *th, int nbytes)
698 {
699 	struct tcp6_pseudohdr *bp;
700 	struct scatterlist sg;
701 	struct tcphdr *_th;
702 
703 	bp = hp->scratch;
704 	/* 1. TCP pseudo-header (RFC2460) */
705 	bp->saddr = *saddr;
706 	bp->daddr = *daddr;
707 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
708 	bp->len = cpu_to_be32(nbytes);
709 
710 	_th = (struct tcphdr *)(bp + 1);
711 	memcpy(_th, th, sizeof(*th));
712 	_th->check = 0;
713 
714 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
715 	ahash_request_set_crypt(hp->req, &sg, NULL,
716 				sizeof(*bp) + sizeof(*th));
717 	return crypto_ahash_update(hp->req);
718 }
719 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)720 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
721 			       const struct in6_addr *daddr, struct in6_addr *saddr,
722 			       const struct tcphdr *th)
723 {
724 	struct tcp_sigpool hp;
725 
726 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
727 		goto clear_hash_nostart;
728 
729 	if (crypto_ahash_init(hp.req))
730 		goto clear_hash;
731 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
732 		goto clear_hash;
733 	if (tcp_md5_hash_key(&hp, key))
734 		goto clear_hash;
735 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
736 	if (crypto_ahash_final(hp.req))
737 		goto clear_hash;
738 
739 	tcp_sigpool_end(&hp);
740 	return 0;
741 
742 clear_hash:
743 	tcp_sigpool_end(&hp);
744 clear_hash_nostart:
745 	memset(md5_hash, 0, 16);
746 	return 1;
747 }
748 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)749 static int tcp_v6_md5_hash_skb(char *md5_hash,
750 			       const struct tcp_md5sig_key *key,
751 			       const struct sock *sk,
752 			       const struct sk_buff *skb)
753 {
754 	const struct tcphdr *th = tcp_hdr(skb);
755 	const struct in6_addr *saddr, *daddr;
756 	struct tcp_sigpool hp;
757 
758 	if (sk) { /* valid for establish/request sockets */
759 		saddr = &sk->sk_v6_rcv_saddr;
760 		daddr = &sk->sk_v6_daddr;
761 	} else {
762 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
763 		saddr = &ip6h->saddr;
764 		daddr = &ip6h->daddr;
765 	}
766 
767 	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
768 		goto clear_hash_nostart;
769 
770 	if (crypto_ahash_init(hp.req))
771 		goto clear_hash;
772 
773 	if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
774 		goto clear_hash;
775 	if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
776 		goto clear_hash;
777 	if (tcp_md5_hash_key(&hp, key))
778 		goto clear_hash;
779 	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
780 	if (crypto_ahash_final(hp.req))
781 		goto clear_hash;
782 
783 	tcp_sigpool_end(&hp);
784 	return 0;
785 
786 clear_hash:
787 	tcp_sigpool_end(&hp);
788 clear_hash_nostart:
789 	memset(md5_hash, 0, 16);
790 	return 1;
791 }
792 #endif
793 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb,u32 tw_isn)794 static void tcp_v6_init_req(struct request_sock *req,
795 			    const struct sock *sk_listener,
796 			    struct sk_buff *skb,
797 			    u32 tw_isn)
798 {
799 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800 	struct inet_request_sock *ireq = inet_rsk(req);
801 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
802 
803 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
805 
806 	/* So that link locals have meaning */
807 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
808 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
809 		ireq->ir_iif = tcp_v6_iif(skb);
810 
811 	if (!tw_isn &&
812 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
813 	     np->rxopt.bits.rxinfo ||
814 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
815 	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
816 		refcount_inc(&skb->users);
817 		ireq->pktopts = skb;
818 	}
819 }
820 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req,u32 tw_isn)821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
822 					  struct sk_buff *skb,
823 					  struct flowi *fl,
824 					  struct request_sock *req,
825 					  u32 tw_isn)
826 {
827 	tcp_v6_init_req(req, sk, skb, tw_isn);
828 
829 	if (security_inet_conn_request(sk, skb, req))
830 		return NULL;
831 
832 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
833 }
834 
835 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
836 	.family		=	AF_INET6,
837 	.obj_size	=	sizeof(struct tcp6_request_sock),
838 	.rtx_syn_ack	=	tcp_rtx_synack,
839 	.send_ack	=	tcp_v6_reqsk_send_ack,
840 	.destructor	=	tcp_v6_reqsk_destructor,
841 	.send_reset	=	tcp_v6_send_reset,
842 	.syn_ack_timeout =	tcp_syn_ack_timeout,
843 };
844 
845 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
846 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
847 				sizeof(struct ipv6hdr),
848 #ifdef CONFIG_TCP_MD5SIG
849 	.req_md5_lookup	=	tcp_v6_md5_lookup,
850 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
851 #endif
852 #ifdef CONFIG_TCP_AO
853 	.ao_lookup	=	tcp_v6_ao_lookup_rsk,
854 	.ao_calc_key	=	tcp_v6_ao_calc_key_rsk,
855 	.ao_synack_hash =	tcp_v6_ao_synack_hash,
856 #endif
857 #ifdef CONFIG_SYN_COOKIES
858 	.cookie_init_seq =	cookie_v6_init_sequence,
859 #endif
860 	.route_req	=	tcp_v6_route_req,
861 	.init_seq	=	tcp_v6_init_seq,
862 	.init_ts_off	=	tcp_v6_init_ts_off,
863 	.send_synack	=	tcp_v6_send_synack,
864 };
865 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash,struct tcp_key * key)866 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
867 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
868 				 int oif, int rst, u8 tclass, __be32 label,
869 				 u32 priority, u32 txhash, struct tcp_key *key)
870 {
871 	const struct tcphdr *th = tcp_hdr(skb);
872 	struct tcphdr *t1;
873 	struct sk_buff *buff;
874 	struct flowi6 fl6;
875 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
876 	struct sock *ctl_sk = net->ipv6.tcp_sk;
877 	unsigned int tot_len = sizeof(struct tcphdr);
878 	__be32 mrst = 0, *topt;
879 	struct dst_entry *dst;
880 	__u32 mark = 0;
881 
882 	if (tsecr)
883 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
884 	if (tcp_key_is_md5(key))
885 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
886 	if (tcp_key_is_ao(key))
887 		tot_len += tcp_ao_len_aligned(key->ao_key);
888 
889 #ifdef CONFIG_MPTCP
890 	if (rst && !tcp_key_is_md5(key)) {
891 		mrst = mptcp_reset_option(skb);
892 
893 		if (mrst)
894 			tot_len += sizeof(__be32);
895 	}
896 #endif
897 
898 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
899 	if (!buff)
900 		return;
901 
902 	skb_reserve(buff, MAX_TCP_HEADER);
903 
904 	t1 = skb_push(buff, tot_len);
905 	skb_reset_transport_header(buff);
906 
907 	/* Swap the send and the receive. */
908 	memset(t1, 0, sizeof(*t1));
909 	t1->dest = th->source;
910 	t1->source = th->dest;
911 	t1->doff = tot_len / 4;
912 	t1->seq = htonl(seq);
913 	t1->ack_seq = htonl(ack);
914 	t1->ack = !rst || !th->ack;
915 	t1->rst = rst;
916 	t1->window = htons(win);
917 
918 	topt = (__be32 *)(t1 + 1);
919 
920 	if (tsecr) {
921 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
922 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
923 		*topt++ = htonl(tsval);
924 		*topt++ = htonl(tsecr);
925 	}
926 
927 	if (mrst)
928 		*topt++ = mrst;
929 
930 #ifdef CONFIG_TCP_MD5SIG
931 	if (tcp_key_is_md5(key)) {
932 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
933 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
934 		tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
935 				    &ipv6_hdr(skb)->saddr,
936 				    &ipv6_hdr(skb)->daddr, t1);
937 	}
938 #endif
939 #ifdef CONFIG_TCP_AO
940 	if (tcp_key_is_ao(key)) {
941 		*topt++ = htonl((TCPOPT_AO << 24) |
942 				(tcp_ao_len(key->ao_key) << 16) |
943 				(key->ao_key->sndid << 8) |
944 				(key->rcv_next));
945 
946 		tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
947 				key->traffic_key,
948 				(union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
949 				(union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
950 				t1, key->sne);
951 	}
952 #endif
953 
954 	memset(&fl6, 0, sizeof(fl6));
955 	fl6.daddr = ipv6_hdr(skb)->saddr;
956 	fl6.saddr = ipv6_hdr(skb)->daddr;
957 	fl6.flowlabel = label;
958 
959 	buff->ip_summed = CHECKSUM_PARTIAL;
960 
961 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
962 
963 	fl6.flowi6_proto = IPPROTO_TCP;
964 	if (rt6_need_strict(&fl6.daddr) && !oif)
965 		fl6.flowi6_oif = tcp_v6_iif(skb);
966 	else {
967 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
968 			oif = skb->skb_iif;
969 
970 		fl6.flowi6_oif = oif;
971 	}
972 
973 	if (sk) {
974 		if (sk->sk_state == TCP_TIME_WAIT)
975 			mark = inet_twsk(sk)->tw_mark;
976 		else
977 			mark = READ_ONCE(sk->sk_mark);
978 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
979 	}
980 	if (txhash) {
981 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
982 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
983 	}
984 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
985 	fl6.fl6_dport = t1->dest;
986 	fl6.fl6_sport = t1->source;
987 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
988 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
989 
990 	/* Pass a socket to ip6_dst_lookup either it is for RST
991 	 * Underlying function will use this to retrieve the network
992 	 * namespace
993 	 */
994 	if (sk && sk->sk_state != TCP_TIME_WAIT)
995 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
996 	else
997 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
998 	if (!IS_ERR(dst)) {
999 		skb_dst_set(buff, dst);
1000 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1001 			 tclass & ~INET_ECN_MASK, priority);
1002 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1003 		if (rst)
1004 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1005 		return;
1006 	}
1007 
1008 	kfree_skb(buff);
1009 }
1010 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb,enum sk_rst_reason reason)1011 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
1012 			      enum sk_rst_reason reason)
1013 {
1014 	const struct tcphdr *th = tcp_hdr(skb);
1015 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1016 	const __u8 *md5_hash_location = NULL;
1017 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1018 	bool allocated_traffic_key = false;
1019 #endif
1020 	const struct tcp_ao_hdr *aoh;
1021 	struct tcp_key key = {};
1022 	u32 seq = 0, ack_seq = 0;
1023 	__be32 label = 0;
1024 	u32 priority = 0;
1025 	struct net *net;
1026 	u32 txhash = 0;
1027 	int oif = 0;
1028 #ifdef CONFIG_TCP_MD5SIG
1029 	unsigned char newhash[16];
1030 	int genhash;
1031 	struct sock *sk1 = NULL;
1032 #endif
1033 
1034 	if (th->rst)
1035 		return;
1036 
1037 	/* If sk not NULL, it means we did a successful lookup and incoming
1038 	 * route had to be correct. prequeue might have dropped our dst.
1039 	 */
1040 	if (!sk && !ipv6_unicast_destination(skb))
1041 		return;
1042 
1043 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1044 	/* Invalid TCP option size or twice included auth */
1045 	if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1046 		return;
1047 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1048 	rcu_read_lock();
1049 #endif
1050 #ifdef CONFIG_TCP_MD5SIG
1051 	if (sk && sk_fullsock(sk)) {
1052 		int l3index;
1053 
1054 		/* sdif set, means packet ingressed via a device
1055 		 * in an L3 domain and inet_iif is set to it.
1056 		 */
1057 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1058 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1059 		if (key.md5_key)
1060 			key.type = TCP_KEY_MD5;
1061 	} else if (md5_hash_location) {
1062 		int dif = tcp_v6_iif_l3_slave(skb);
1063 		int sdif = tcp_v6_sdif(skb);
1064 		int l3index;
1065 
1066 		/*
1067 		 * active side is lost. Try to find listening socket through
1068 		 * source port, and then find md5 key through listening socket.
1069 		 * we are not loose security here:
1070 		 * Incoming packet is checked with md5 hash with finding key,
1071 		 * no RST generated if md5 hash doesn't match.
1072 		 */
1073 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1074 					    NULL, 0, &ipv6h->saddr, th->source,
1075 					    &ipv6h->daddr, ntohs(th->source),
1076 					    dif, sdif);
1077 		if (!sk1)
1078 			goto out;
1079 
1080 		/* sdif set, means packet ingressed via a device
1081 		 * in an L3 domain and dif is set to it.
1082 		 */
1083 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1084 
1085 		key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1086 		if (!key.md5_key)
1087 			goto out;
1088 		key.type = TCP_KEY_MD5;
1089 
1090 		genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1091 		if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
1092 			goto out;
1093 	}
1094 #endif
1095 
1096 	if (th->ack)
1097 		seq = ntohl(th->ack_seq);
1098 	else
1099 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1100 			  (th->doff << 2);
1101 
1102 #ifdef CONFIG_TCP_AO
1103 	if (aoh) {
1104 		int l3index;
1105 
1106 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1107 		if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1108 					 &key.ao_key, &key.traffic_key,
1109 					 &allocated_traffic_key,
1110 					 &key.rcv_next, &key.sne))
1111 			goto out;
1112 		key.type = TCP_KEY_AO;
1113 	}
1114 #endif
1115 
1116 	if (sk) {
1117 		oif = sk->sk_bound_dev_if;
1118 		if (sk_fullsock(sk)) {
1119 			if (inet6_test_bit(REPFLOW, sk))
1120 				label = ip6_flowlabel(ipv6h);
1121 			priority = READ_ONCE(sk->sk_priority);
1122 			txhash = sk->sk_txhash;
1123 		}
1124 		if (sk->sk_state == TCP_TIME_WAIT) {
1125 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1126 			priority = inet_twsk(sk)->tw_priority;
1127 			txhash = inet_twsk(sk)->tw_txhash;
1128 		}
1129 	} else {
1130 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1131 			label = ip6_flowlabel(ipv6h);
1132 	}
1133 
1134 	trace_tcp_send_reset(sk, skb, reason);
1135 
1136 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1137 			     ipv6_get_dsfield(ipv6h), label, priority, txhash,
1138 			     &key);
1139 
1140 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1141 out:
1142 	if (allocated_traffic_key)
1143 		kfree(key.traffic_key);
1144 	rcu_read_unlock();
1145 #endif
1146 }
1147 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1148 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1149 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1150 			    struct tcp_key *key, u8 tclass,
1151 			    __be32 label, u32 priority, u32 txhash)
1152 {
1153 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1154 			     tclass, label, priority, txhash, key);
1155 }
1156 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1157 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1158 {
1159 	struct inet_timewait_sock *tw = inet_twsk(sk);
1160 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1161 	struct tcp_key key = {};
1162 #ifdef CONFIG_TCP_AO
1163 	struct tcp_ao_info *ao_info;
1164 
1165 	if (static_branch_unlikely(&tcp_ao_needed.key)) {
1166 
1167 		/* FIXME: the segment to-be-acked is not verified yet */
1168 		ao_info = rcu_dereference(tcptw->ao_info);
1169 		if (ao_info) {
1170 			const struct tcp_ao_hdr *aoh;
1171 
1172 			/* Invalid TCP option size or twice included auth */
1173 			if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1174 				goto out;
1175 			if (aoh)
1176 				key.ao_key = tcp_ao_established_key(ao_info,
1177 						aoh->rnext_keyid, -1);
1178 		}
1179 	}
1180 	if (key.ao_key) {
1181 		struct tcp_ao_key *rnext_key;
1182 
1183 		key.traffic_key = snd_other_key(key.ao_key);
1184 		/* rcv_next switches to our rcv_next */
1185 		rnext_key = READ_ONCE(ao_info->rnext_key);
1186 		key.rcv_next = rnext_key->rcvid;
1187 		key.sne = READ_ONCE(ao_info->snd_sne);
1188 		key.type = TCP_KEY_AO;
1189 #else
1190 	if (0) {
1191 #endif
1192 #ifdef CONFIG_TCP_MD5SIG
1193 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1194 		key.md5_key = tcp_twsk_md5_key(tcptw);
1195 		if (key.md5_key)
1196 			key.type = TCP_KEY_MD5;
1197 #endif
1198 	}
1199 
1200 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1201 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1202 			tcp_tw_tsval(tcptw),
1203 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
1204 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1205 			tw->tw_txhash);
1206 
1207 #ifdef CONFIG_TCP_AO
1208 out:
1209 #endif
1210 	inet_twsk_put(tw);
1211 }
1212 
1213 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1214 				  struct request_sock *req)
1215 {
1216 	struct tcp_key key = {};
1217 
1218 #ifdef CONFIG_TCP_AO
1219 	if (static_branch_unlikely(&tcp_ao_needed.key) &&
1220 	    tcp_rsk_used_ao(req)) {
1221 		const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1222 		const struct tcp_ao_hdr *aoh;
1223 		int l3index;
1224 
1225 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1226 		/* Invalid TCP option size or twice included auth */
1227 		if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1228 			return;
1229 		if (!aoh)
1230 			return;
1231 		key.ao_key = tcp_ao_do_lookup(sk, l3index,
1232 					      (union tcp_ao_addr *)addr,
1233 					      AF_INET6, aoh->rnext_keyid, -1);
1234 		if (unlikely(!key.ao_key)) {
1235 			/* Send ACK with any matching MKT for the peer */
1236 			key.ao_key = tcp_ao_do_lookup(sk, l3index,
1237 						      (union tcp_ao_addr *)addr,
1238 						      AF_INET6, -1, -1);
1239 			/* Matching key disappeared (user removed the key?)
1240 			 * let the handshake timeout.
1241 			 */
1242 			if (!key.ao_key) {
1243 				net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1244 						     addr,
1245 						     ntohs(tcp_hdr(skb)->source),
1246 						     &ipv6_hdr(skb)->daddr,
1247 						     ntohs(tcp_hdr(skb)->dest));
1248 				return;
1249 			}
1250 		}
1251 		key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1252 		if (!key.traffic_key)
1253 			return;
1254 
1255 		key.type = TCP_KEY_AO;
1256 		key.rcv_next = aoh->keyid;
1257 		tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1258 #else
1259 	if (0) {
1260 #endif
1261 #ifdef CONFIG_TCP_MD5SIG
1262 	} else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1263 		int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1264 
1265 		key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1266 						   l3index);
1267 		if (key.md5_key)
1268 			key.type = TCP_KEY_MD5;
1269 #endif
1270 	}
1271 
1272 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1273 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1274 	 */
1275 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1276 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1277 			tcp_rsk(req)->rcv_nxt,
1278 			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
1279 			tcp_rsk_tsval(tcp_rsk(req)),
1280 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1281 			&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1282 			READ_ONCE(sk->sk_priority),
1283 			READ_ONCE(tcp_rsk(req)->txhash));
1284 	if (tcp_key_is_ao(&key))
1285 		kfree(key.traffic_key);
1286 }
1287 
1288 
1289 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1290 {
1291 #ifdef CONFIG_SYN_COOKIES
1292 	const struct tcphdr *th = tcp_hdr(skb);
1293 
1294 	if (!th->syn)
1295 		sk = cookie_v6_check(sk, skb);
1296 #endif
1297 	return sk;
1298 }
1299 
1300 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1301 			 struct tcphdr *th, u32 *cookie)
1302 {
1303 	u16 mss = 0;
1304 #ifdef CONFIG_SYN_COOKIES
1305 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1306 				    &tcp_request_sock_ipv6_ops, sk, th);
1307 	if (mss) {
1308 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1309 		tcp_synq_overflow(sk);
1310 	}
1311 #endif
1312 	return mss;
1313 }
1314 
1315 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1316 {
1317 	if (skb->protocol == htons(ETH_P_IP))
1318 		return tcp_v4_conn_request(sk, skb);
1319 
1320 	if (!ipv6_unicast_destination(skb))
1321 		goto drop;
1322 
1323 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1324 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1325 		return 0;
1326 	}
1327 
1328 	return tcp_conn_request(&tcp6_request_sock_ops,
1329 				&tcp_request_sock_ipv6_ops, sk, skb);
1330 
1331 drop:
1332 	tcp_listendrop(sk);
1333 	return 0; /* don't send reset */
1334 }
1335 
1336 static void tcp_v6_restore_cb(struct sk_buff *skb)
1337 {
1338 	/* We need to move header back to the beginning if xfrm6_policy_check()
1339 	 * and tcp_v6_fill_cb() are going to be called again.
1340 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1341 	 */
1342 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1343 		sizeof(struct inet6_skb_parm));
1344 }
1345 
1346 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1347 					 struct request_sock *req,
1348 					 struct dst_entry *dst,
1349 					 struct request_sock *req_unhash,
1350 					 bool *own_req)
1351 {
1352 	struct inet_request_sock *ireq;
1353 	struct ipv6_pinfo *newnp;
1354 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1355 	struct ipv6_txoptions *opt;
1356 	struct inet_sock *newinet;
1357 	bool found_dup_sk = false;
1358 	struct tcp_sock *newtp;
1359 	struct sock *newsk;
1360 #ifdef CONFIG_TCP_MD5SIG
1361 	struct tcp_md5sig_key *key;
1362 	int l3index;
1363 #endif
1364 	struct flowi6 fl6;
1365 
1366 	if (skb->protocol == htons(ETH_P_IP)) {
1367 		/*
1368 		 *	v6 mapped
1369 		 */
1370 
1371 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1372 					     req_unhash, own_req);
1373 
1374 		if (!newsk)
1375 			return NULL;
1376 
1377 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1378 
1379 		newnp = tcp_inet6_sk(newsk);
1380 		newtp = tcp_sk(newsk);
1381 
1382 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1383 
1384 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1385 
1386 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1387 		if (sk_is_mptcp(newsk))
1388 			mptcpv6_handle_mapped(newsk, true);
1389 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1390 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1391 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1392 #endif
1393 
1394 		newnp->ipv6_mc_list = NULL;
1395 		newnp->ipv6_ac_list = NULL;
1396 		newnp->ipv6_fl_list = NULL;
1397 		newnp->pktoptions  = NULL;
1398 		newnp->opt	   = NULL;
1399 		newnp->mcast_oif   = inet_iif(skb);
1400 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1401 		newnp->rcv_flowinfo = 0;
1402 		if (inet6_test_bit(REPFLOW, sk))
1403 			newnp->flow_label = 0;
1404 
1405 		/*
1406 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1407 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1408 		 * that function for the gory details. -acme
1409 		 */
1410 
1411 		/* It is tricky place. Until this moment IPv4 tcp
1412 		   worked with IPv6 icsk.icsk_af_ops.
1413 		   Sync it now.
1414 		 */
1415 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1416 
1417 		return newsk;
1418 	}
1419 
1420 	ireq = inet_rsk(req);
1421 
1422 	if (sk_acceptq_is_full(sk))
1423 		goto out_overflow;
1424 
1425 	if (!dst) {
1426 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1427 		if (!dst)
1428 			goto out;
1429 	}
1430 
1431 	newsk = tcp_create_openreq_child(sk, req, skb);
1432 	if (!newsk)
1433 		goto out_nonewsk;
1434 
1435 	/*
1436 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1437 	 * count here, tcp_create_openreq_child now does this for us, see the
1438 	 * comment in that function for the gory details. -acme
1439 	 */
1440 
1441 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1442 	ip6_dst_store(newsk, dst, NULL, NULL);
1443 	inet6_sk_rx_dst_set(newsk, skb);
1444 
1445 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1446 
1447 	newtp = tcp_sk(newsk);
1448 	newinet = inet_sk(newsk);
1449 	newnp = tcp_inet6_sk(newsk);
1450 
1451 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1452 
1453 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1454 	newnp->saddr = ireq->ir_v6_loc_addr;
1455 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1456 	newsk->sk_bound_dev_if = ireq->ir_iif;
1457 
1458 	/* Now IPv6 options...
1459 
1460 	   First: no IPv4 options.
1461 	 */
1462 	newinet->inet_opt = NULL;
1463 	newnp->ipv6_mc_list = NULL;
1464 	newnp->ipv6_ac_list = NULL;
1465 	newnp->ipv6_fl_list = NULL;
1466 
1467 	/* Clone RX bits */
1468 	newnp->rxopt.all = np->rxopt.all;
1469 
1470 	newnp->pktoptions = NULL;
1471 	newnp->opt	  = NULL;
1472 	newnp->mcast_oif  = tcp_v6_iif(skb);
1473 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1474 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1475 	if (inet6_test_bit(REPFLOW, sk))
1476 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1477 
1478 	/* Set ToS of the new socket based upon the value of incoming SYN.
1479 	 * ECT bits are set later in tcp_init_transfer().
1480 	 */
1481 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1482 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1483 
1484 	/* Clone native IPv6 options from listening socket (if any)
1485 
1486 	   Yes, keeping reference count would be much more clever,
1487 	   but we make one more one thing there: reattach optmem
1488 	   to newsk.
1489 	 */
1490 	opt = ireq->ipv6_opt;
1491 	if (!opt)
1492 		opt = rcu_dereference(np->opt);
1493 	if (opt) {
1494 		opt = ipv6_dup_options(newsk, opt);
1495 		RCU_INIT_POINTER(newnp->opt, opt);
1496 	}
1497 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1498 	if (opt)
1499 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1500 						    opt->opt_flen;
1501 
1502 	tcp_ca_openreq_child(newsk, dst);
1503 
1504 	tcp_sync_mss(newsk, dst_mtu(dst));
1505 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1506 
1507 	tcp_initialize_rcv_mss(newsk);
1508 
1509 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1510 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1511 
1512 #ifdef CONFIG_TCP_MD5SIG
1513 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1514 
1515 	if (!tcp_rsk_used_ao(req)) {
1516 		/* Copy over the MD5 key from the original socket */
1517 		key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1518 		if (key) {
1519 			const union tcp_md5_addr *addr;
1520 
1521 			addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1522 			if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1523 				inet_csk_prepare_forced_close(newsk);
1524 				tcp_done(newsk);
1525 				goto out;
1526 			}
1527 		}
1528 	}
1529 #endif
1530 #ifdef CONFIG_TCP_AO
1531 	/* Copy over tcp_ao_info if any */
1532 	if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1533 		goto out; /* OOM */
1534 #endif
1535 
1536 	if (__inet_inherit_port(sk, newsk) < 0) {
1537 		inet_csk_prepare_forced_close(newsk);
1538 		tcp_done(newsk);
1539 		goto out;
1540 	}
1541 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1542 				       &found_dup_sk);
1543 	if (*own_req) {
1544 		tcp_move_syn(newtp, req);
1545 
1546 		/* Clone pktoptions received with SYN, if we own the req */
1547 		if (ireq->pktopts) {
1548 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1549 			consume_skb(ireq->pktopts);
1550 			ireq->pktopts = NULL;
1551 			if (newnp->pktoptions)
1552 				tcp_v6_restore_cb(newnp->pktoptions);
1553 		}
1554 	} else {
1555 		if (!req_unhash && found_dup_sk) {
1556 			/* This code path should only be executed in the
1557 			 * syncookie case only
1558 			 */
1559 			bh_unlock_sock(newsk);
1560 			sock_put(newsk);
1561 			newsk = NULL;
1562 		}
1563 	}
1564 
1565 	return newsk;
1566 
1567 out_overflow:
1568 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1569 out_nonewsk:
1570 	dst_release(dst);
1571 out:
1572 	tcp_listendrop(sk);
1573 	return NULL;
1574 }
1575 
1576 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1577 							   u32));
1578 /* The socket must have it's spinlock held when we get
1579  * here, unless it is a TCP_LISTEN socket.
1580  *
1581  * We have a potential double-lock case here, so even when
1582  * doing backlog processing we use the BH locking scheme.
1583  * This is because we cannot sleep with the original spinlock
1584  * held.
1585  */
1586 INDIRECT_CALLABLE_SCOPE
1587 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1588 {
1589 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1590 	struct sk_buff *opt_skb = NULL;
1591 	enum skb_drop_reason reason;
1592 	struct tcp_sock *tp;
1593 
1594 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1595 	   goes to IPv4 receive handler and backlogged.
1596 	   From backlog it always goes here. Kerboom...
1597 	   Fortunately, tcp_rcv_established and rcv_established
1598 	   handle them correctly, but it is not case with
1599 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1600 	 */
1601 
1602 	if (skb->protocol == htons(ETH_P_IP))
1603 		return tcp_v4_do_rcv(sk, skb);
1604 
1605 	/*
1606 	 *	socket locking is here for SMP purposes as backlog rcv
1607 	 *	is currently called with bh processing disabled.
1608 	 */
1609 
1610 	/* Do Stevens' IPV6_PKTOPTIONS.
1611 
1612 	   Yes, guys, it is the only place in our code, where we
1613 	   may make it not affecting IPv4.
1614 	   The rest of code is protocol independent,
1615 	   and I do not like idea to uglify IPv4.
1616 
1617 	   Actually, all the idea behind IPV6_PKTOPTIONS
1618 	   looks not very well thought. For now we latch
1619 	   options, received in the last packet, enqueued
1620 	   by tcp. Feel free to propose better solution.
1621 					       --ANK (980728)
1622 	 */
1623 	if (np->rxopt.all)
1624 		opt_skb = skb_clone_and_charge_r(skb, sk);
1625 
1626 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627 		struct dst_entry *dst;
1628 
1629 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1630 						lockdep_sock_is_held(sk));
1631 
1632 		sock_rps_save_rxhash(sk, skb);
1633 		sk_mark_napi_id(sk, skb);
1634 		if (dst) {
1635 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1636 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1637 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1638 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1639 				dst_release(dst);
1640 			}
1641 		}
1642 
1643 		tcp_rcv_established(sk, skb);
1644 		if (opt_skb)
1645 			goto ipv6_pktoptions;
1646 		return 0;
1647 	}
1648 
1649 	if (tcp_checksum_complete(skb))
1650 		goto csum_err;
1651 
1652 	if (sk->sk_state == TCP_LISTEN) {
1653 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1654 
1655 		if (nsk != sk) {
1656 			if (nsk) {
1657 				reason = tcp_child_process(sk, nsk, skb);
1658 				if (reason)
1659 					goto reset;
1660 			}
1661 			if (opt_skb)
1662 				__kfree_skb(opt_skb);
1663 			return 0;
1664 		}
1665 	} else
1666 		sock_rps_save_rxhash(sk, skb);
1667 
1668 	reason = tcp_rcv_state_process(sk, skb);
1669 	if (reason)
1670 		goto reset;
1671 	if (opt_skb)
1672 		goto ipv6_pktoptions;
1673 	return 0;
1674 
1675 reset:
1676 	tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
1677 discard:
1678 	if (opt_skb)
1679 		__kfree_skb(opt_skb);
1680 	kfree_skb_reason(skb, reason);
1681 	return 0;
1682 csum_err:
1683 	reason = SKB_DROP_REASON_TCP_CSUM;
1684 	trace_tcp_bad_csum(skb);
1685 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1686 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1687 	goto discard;
1688 
1689 
1690 ipv6_pktoptions:
1691 	/* Do you ask, what is it?
1692 
1693 	   1. skb was enqueued by tcp.
1694 	   2. skb is added to tail of read queue, rather than out of order.
1695 	   3. socket is not in passive state.
1696 	   4. Finally, it really contains options, which user wants to receive.
1697 	 */
1698 	tp = tcp_sk(sk);
1699 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1700 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1701 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1702 			WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
1703 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1704 			WRITE_ONCE(np->mcast_hops,
1705 				   ipv6_hdr(opt_skb)->hop_limit);
1706 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1707 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1708 		if (inet6_test_bit(REPFLOW, sk))
1709 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1710 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1711 			tcp_v6_restore_cb(opt_skb);
1712 			opt_skb = xchg(&np->pktoptions, opt_skb);
1713 		} else {
1714 			__kfree_skb(opt_skb);
1715 			opt_skb = xchg(&np->pktoptions, NULL);
1716 		}
1717 	}
1718 
1719 	consume_skb(opt_skb);
1720 	return 0;
1721 }
1722 
1723 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1724 			   const struct tcphdr *th)
1725 {
1726 	/* This is tricky: we move IP6CB at its correct location into
1727 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1728 	 * _decode_session6() uses IP6CB().
1729 	 * barrier() makes sure compiler won't play aliasing games.
1730 	 */
1731 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1732 		sizeof(struct inet6_skb_parm));
1733 	barrier();
1734 
1735 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1736 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1737 				    skb->len - th->doff*4);
1738 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1739 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1740 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1741 	TCP_SKB_CB(skb)->sacked = 0;
1742 	TCP_SKB_CB(skb)->has_rxtstamp =
1743 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1744 }
1745 
1746 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1747 {
1748 	enum skb_drop_reason drop_reason;
1749 	int sdif = inet6_sdif(skb);
1750 	int dif = inet6_iif(skb);
1751 	const struct tcphdr *th;
1752 	const struct ipv6hdr *hdr;
1753 	bool refcounted;
1754 	struct sock *sk;
1755 	int ret;
1756 	u32 isn;
1757 	struct net *net = dev_net(skb->dev);
1758 
1759 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1760 	if (skb->pkt_type != PACKET_HOST)
1761 		goto discard_it;
1762 
1763 	/*
1764 	 *	Count it even if it's bad.
1765 	 */
1766 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1767 
1768 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1769 		goto discard_it;
1770 
1771 	th = (const struct tcphdr *)skb->data;
1772 
1773 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1774 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1775 		goto bad_packet;
1776 	}
1777 	if (!pskb_may_pull(skb, th->doff*4))
1778 		goto discard_it;
1779 
1780 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1781 		goto csum_error;
1782 
1783 	th = (const struct tcphdr *)skb->data;
1784 	hdr = ipv6_hdr(skb);
1785 
1786 lookup:
1787 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1788 				th->source, th->dest, inet6_iif(skb), sdif,
1789 				&refcounted);
1790 	if (!sk)
1791 		goto no_tcp_socket;
1792 
1793 	if (sk->sk_state == TCP_TIME_WAIT)
1794 		goto do_time_wait;
1795 
1796 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1797 		struct request_sock *req = inet_reqsk(sk);
1798 		bool req_stolen = false;
1799 		struct sock *nsk;
1800 
1801 		sk = req->rsk_listener;
1802 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1803 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1804 		else
1805 			drop_reason = tcp_inbound_hash(sk, req, skb,
1806 						       &hdr->saddr, &hdr->daddr,
1807 						       AF_INET6, dif, sdif);
1808 		if (drop_reason) {
1809 			sk_drops_add(sk, skb);
1810 			reqsk_put(req);
1811 			goto discard_it;
1812 		}
1813 		if (tcp_checksum_complete(skb)) {
1814 			reqsk_put(req);
1815 			goto csum_error;
1816 		}
1817 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1818 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1819 			if (!nsk) {
1820 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1821 				goto lookup;
1822 			}
1823 			sk = nsk;
1824 			/* reuseport_migrate_sock() has already held one sk_refcnt
1825 			 * before returning.
1826 			 */
1827 		} else {
1828 			sock_hold(sk);
1829 		}
1830 		refcounted = true;
1831 		nsk = NULL;
1832 		if (!tcp_filter(sk, skb)) {
1833 			th = (const struct tcphdr *)skb->data;
1834 			hdr = ipv6_hdr(skb);
1835 			tcp_v6_fill_cb(skb, hdr, th);
1836 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1837 		} else {
1838 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1839 		}
1840 		if (!nsk) {
1841 			reqsk_put(req);
1842 			if (req_stolen) {
1843 				/* Another cpu got exclusive access to req
1844 				 * and created a full blown socket.
1845 				 * Try to feed this packet to this socket
1846 				 * instead of discarding it.
1847 				 */
1848 				tcp_v6_restore_cb(skb);
1849 				sock_put(sk);
1850 				goto lookup;
1851 			}
1852 			goto discard_and_relse;
1853 		}
1854 		nf_reset_ct(skb);
1855 		if (nsk == sk) {
1856 			reqsk_put(req);
1857 			tcp_v6_restore_cb(skb);
1858 		} else {
1859 			drop_reason = tcp_child_process(sk, nsk, skb);
1860 			if (drop_reason) {
1861 				enum sk_rst_reason rst_reason;
1862 
1863 				rst_reason = sk_rst_convert_drop_reason(drop_reason);
1864 				tcp_v6_send_reset(nsk, skb, rst_reason);
1865 				goto discard_and_relse;
1866 			}
1867 			sock_put(sk);
1868 			return 0;
1869 		}
1870 	}
1871 
1872 process:
1873 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1874 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1875 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1876 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1877 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1878 			goto discard_and_relse;
1879 		}
1880 	}
1881 
1882 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1883 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1884 		goto discard_and_relse;
1885 	}
1886 
1887 	drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1888 				       AF_INET6, dif, sdif);
1889 	if (drop_reason)
1890 		goto discard_and_relse;
1891 
1892 	nf_reset_ct(skb);
1893 
1894 	if (tcp_filter(sk, skb)) {
1895 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1896 		goto discard_and_relse;
1897 	}
1898 	th = (const struct tcphdr *)skb->data;
1899 	hdr = ipv6_hdr(skb);
1900 	tcp_v6_fill_cb(skb, hdr, th);
1901 
1902 	skb->dev = NULL;
1903 
1904 	if (sk->sk_state == TCP_LISTEN) {
1905 		ret = tcp_v6_do_rcv(sk, skb);
1906 		goto put_and_return;
1907 	}
1908 
1909 	sk_incoming_cpu_update(sk);
1910 
1911 	bh_lock_sock_nested(sk);
1912 	tcp_segs_in(tcp_sk(sk), skb);
1913 	ret = 0;
1914 	if (!sock_owned_by_user(sk)) {
1915 		ret = tcp_v6_do_rcv(sk, skb);
1916 	} else {
1917 		if (tcp_add_backlog(sk, skb, &drop_reason))
1918 			goto discard_and_relse;
1919 	}
1920 	bh_unlock_sock(sk);
1921 put_and_return:
1922 	if (refcounted)
1923 		sock_put(sk);
1924 	return ret ? -1 : 0;
1925 
1926 no_tcp_socket:
1927 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1928 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1929 		goto discard_it;
1930 
1931 	tcp_v6_fill_cb(skb, hdr, th);
1932 
1933 	if (tcp_checksum_complete(skb)) {
1934 csum_error:
1935 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1936 		trace_tcp_bad_csum(skb);
1937 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1938 bad_packet:
1939 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1940 	} else {
1941 		tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
1942 	}
1943 
1944 discard_it:
1945 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1946 	kfree_skb_reason(skb, drop_reason);
1947 	return 0;
1948 
1949 discard_and_relse:
1950 	sk_drops_add(sk, skb);
1951 	if (refcounted)
1952 		sock_put(sk);
1953 	goto discard_it;
1954 
1955 do_time_wait:
1956 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1957 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1958 		inet_twsk_put(inet_twsk(sk));
1959 		goto discard_it;
1960 	}
1961 
1962 	tcp_v6_fill_cb(skb, hdr, th);
1963 
1964 	if (tcp_checksum_complete(skb)) {
1965 		inet_twsk_put(inet_twsk(sk));
1966 		goto csum_error;
1967 	}
1968 
1969 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
1970 	case TCP_TW_SYN:
1971 	{
1972 		struct sock *sk2;
1973 
1974 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1975 					    skb, __tcp_hdrlen(th),
1976 					    &ipv6_hdr(skb)->saddr, th->source,
1977 					    &ipv6_hdr(skb)->daddr,
1978 					    ntohs(th->dest),
1979 					    tcp_v6_iif_l3_slave(skb),
1980 					    sdif);
1981 		if (sk2) {
1982 			struct inet_timewait_sock *tw = inet_twsk(sk);
1983 			inet_twsk_deschedule_put(tw);
1984 			sk = sk2;
1985 			tcp_v6_restore_cb(skb);
1986 			refcounted = false;
1987 			__this_cpu_write(tcp_tw_isn, isn);
1988 			goto process;
1989 		}
1990 	}
1991 		/* to ACK */
1992 		fallthrough;
1993 	case TCP_TW_ACK:
1994 		tcp_v6_timewait_ack(sk, skb);
1995 		break;
1996 	case TCP_TW_RST:
1997 		tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
1998 		inet_twsk_deschedule_put(inet_twsk(sk));
1999 		goto discard_it;
2000 	case TCP_TW_SUCCESS:
2001 		;
2002 	}
2003 	goto discard_it;
2004 }
2005 
2006 void tcp_v6_early_demux(struct sk_buff *skb)
2007 {
2008 	struct net *net = dev_net(skb->dev);
2009 	const struct ipv6hdr *hdr;
2010 	const struct tcphdr *th;
2011 	struct sock *sk;
2012 
2013 	if (skb->pkt_type != PACKET_HOST)
2014 		return;
2015 
2016 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2017 		return;
2018 
2019 	hdr = ipv6_hdr(skb);
2020 	th = tcp_hdr(skb);
2021 
2022 	if (th->doff < sizeof(struct tcphdr) / 4)
2023 		return;
2024 
2025 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
2026 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
2027 					&hdr->saddr, th->source,
2028 					&hdr->daddr, ntohs(th->dest),
2029 					inet6_iif(skb), inet6_sdif(skb));
2030 	if (sk) {
2031 		skb->sk = sk;
2032 		skb->destructor = sock_edemux;
2033 		if (sk_fullsock(sk)) {
2034 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2035 
2036 			if (dst)
2037 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
2038 			if (dst &&
2039 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
2040 				skb_dst_set_noref(skb, dst);
2041 		}
2042 	}
2043 }
2044 
2045 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
2046 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
2047 	.twsk_destructor = tcp_twsk_destructor,
2048 };
2049 
2050 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2051 {
2052 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2053 }
2054 
2055 const struct inet_connection_sock_af_ops ipv6_specific = {
2056 	.queue_xmit	   = inet6_csk_xmit,
2057 	.send_check	   = tcp_v6_send_check,
2058 	.rebuild_header	   = inet6_sk_rebuild_header,
2059 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
2060 	.conn_request	   = tcp_v6_conn_request,
2061 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2062 	.net_header_len	   = sizeof(struct ipv6hdr),
2063 	.setsockopt	   = ipv6_setsockopt,
2064 	.getsockopt	   = ipv6_getsockopt,
2065 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
2066 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
2067 	.mtu_reduced	   = tcp_v6_mtu_reduced,
2068 };
2069 
2070 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2071 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2072 #ifdef CONFIG_TCP_MD5SIG
2073 	.md5_lookup	=	tcp_v6_md5_lookup,
2074 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
2075 	.md5_parse	=	tcp_v6_parse_md5_keys,
2076 #endif
2077 #ifdef CONFIG_TCP_AO
2078 	.ao_lookup	=	tcp_v6_ao_lookup,
2079 	.calc_ao_hash	=	tcp_v6_ao_hash_skb,
2080 	.ao_parse	=	tcp_v6_parse_ao,
2081 	.ao_calc_key_sk	=	tcp_v6_ao_calc_key_sk,
2082 #endif
2083 };
2084 #endif
2085 
2086 /*
2087  *	TCP over IPv4 via INET6 API
2088  */
2089 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2090 	.queue_xmit	   = ip_queue_xmit,
2091 	.send_check	   = tcp_v4_send_check,
2092 	.rebuild_header	   = inet_sk_rebuild_header,
2093 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
2094 	.conn_request	   = tcp_v6_conn_request,
2095 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
2096 	.net_header_len	   = sizeof(struct iphdr),
2097 	.setsockopt	   = ipv6_setsockopt,
2098 	.getsockopt	   = ipv6_getsockopt,
2099 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
2100 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
2101 	.mtu_reduced	   = tcp_v4_mtu_reduced,
2102 };
2103 
2104 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2105 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2106 #ifdef CONFIG_TCP_MD5SIG
2107 	.md5_lookup	=	tcp_v4_md5_lookup,
2108 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
2109 	.md5_parse	=	tcp_v6_parse_md5_keys,
2110 #endif
2111 #ifdef CONFIG_TCP_AO
2112 	.ao_lookup	=	tcp_v6_ao_lookup,
2113 	.calc_ao_hash	=	tcp_v4_ao_hash_skb,
2114 	.ao_parse	=	tcp_v6_parse_ao,
2115 	.ao_calc_key_sk	=	tcp_v4_ao_calc_key_sk,
2116 #endif
2117 };
2118 #endif
2119 
2120 /* NOTE: A lot of things set to zero explicitly by call to
2121  *       sk_alloc() so need not be done here.
2122  */
2123 static int tcp_v6_init_sock(struct sock *sk)
2124 {
2125 	struct inet_connection_sock *icsk = inet_csk(sk);
2126 
2127 	tcp_init_sock(sk);
2128 
2129 	icsk->icsk_af_ops = &ipv6_specific;
2130 
2131 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2132 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2133 #endif
2134 
2135 	return 0;
2136 }
2137 
2138 #ifdef CONFIG_PROC_FS
2139 /* Proc filesystem TCPv6 sock list dumping. */
2140 static void get_openreq6(struct seq_file *seq,
2141 			 const struct request_sock *req, int i)
2142 {
2143 	long ttd = req->rsk_timer.expires - jiffies;
2144 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2145 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2146 
2147 	if (ttd < 0)
2148 		ttd = 0;
2149 
2150 	seq_printf(seq,
2151 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2152 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2153 		   i,
2154 		   src->s6_addr32[0], src->s6_addr32[1],
2155 		   src->s6_addr32[2], src->s6_addr32[3],
2156 		   inet_rsk(req)->ir_num,
2157 		   dest->s6_addr32[0], dest->s6_addr32[1],
2158 		   dest->s6_addr32[2], dest->s6_addr32[3],
2159 		   ntohs(inet_rsk(req)->ir_rmt_port),
2160 		   TCP_SYN_RECV,
2161 		   0, 0, /* could print option size, but that is af dependent. */
2162 		   1,   /* timers active (only the expire timer) */
2163 		   jiffies_to_clock_t(ttd),
2164 		   req->num_timeout,
2165 		   from_kuid_munged(seq_user_ns(seq),
2166 				    sock_i_uid(req->rsk_listener)),
2167 		   0,  /* non standard timer */
2168 		   0, /* open_requests have no inode */
2169 		   0, req);
2170 }
2171 
2172 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2173 {
2174 	const struct in6_addr *dest, *src;
2175 	__u16 destp, srcp;
2176 	int timer_active;
2177 	unsigned long timer_expires;
2178 	const struct inet_sock *inet = inet_sk(sp);
2179 	const struct tcp_sock *tp = tcp_sk(sp);
2180 	const struct inet_connection_sock *icsk = inet_csk(sp);
2181 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2182 	int rx_queue;
2183 	int state;
2184 
2185 	dest  = &sp->sk_v6_daddr;
2186 	src   = &sp->sk_v6_rcv_saddr;
2187 	destp = ntohs(inet->inet_dport);
2188 	srcp  = ntohs(inet->inet_sport);
2189 
2190 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2191 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2192 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2193 		timer_active	= 1;
2194 		timer_expires	= icsk->icsk_timeout;
2195 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2196 		timer_active	= 4;
2197 		timer_expires	= icsk->icsk_timeout;
2198 	} else if (timer_pending(&sp->sk_timer)) {
2199 		timer_active	= 2;
2200 		timer_expires	= sp->sk_timer.expires;
2201 	} else {
2202 		timer_active	= 0;
2203 		timer_expires = jiffies;
2204 	}
2205 
2206 	state = inet_sk_state_load(sp);
2207 	if (state == TCP_LISTEN)
2208 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2209 	else
2210 		/* Because we don't lock the socket,
2211 		 * we might find a transient negative value.
2212 		 */
2213 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2214 				      READ_ONCE(tp->copied_seq), 0);
2215 
2216 	seq_printf(seq,
2217 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2218 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2219 		   i,
2220 		   src->s6_addr32[0], src->s6_addr32[1],
2221 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2222 		   dest->s6_addr32[0], dest->s6_addr32[1],
2223 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2224 		   state,
2225 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2226 		   rx_queue,
2227 		   timer_active,
2228 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2229 		   icsk->icsk_retransmits,
2230 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2231 		   icsk->icsk_probes_out,
2232 		   sock_i_ino(sp),
2233 		   refcount_read(&sp->sk_refcnt), sp,
2234 		   jiffies_to_clock_t(icsk->icsk_rto),
2235 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2236 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2237 		   tcp_snd_cwnd(tp),
2238 		   state == TCP_LISTEN ?
2239 			fastopenq->max_qlen :
2240 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2241 		   );
2242 }
2243 
2244 static void get_timewait6_sock(struct seq_file *seq,
2245 			       struct inet_timewait_sock *tw, int i)
2246 {
2247 	long delta = tw->tw_timer.expires - jiffies;
2248 	const struct in6_addr *dest, *src;
2249 	__u16 destp, srcp;
2250 
2251 	dest = &tw->tw_v6_daddr;
2252 	src  = &tw->tw_v6_rcv_saddr;
2253 	destp = ntohs(tw->tw_dport);
2254 	srcp  = ntohs(tw->tw_sport);
2255 
2256 	seq_printf(seq,
2257 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2258 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2259 		   i,
2260 		   src->s6_addr32[0], src->s6_addr32[1],
2261 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2262 		   dest->s6_addr32[0], dest->s6_addr32[1],
2263 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2264 		   tw->tw_substate, 0, 0,
2265 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2266 		   refcount_read(&tw->tw_refcnt), tw);
2267 }
2268 
2269 static int tcp6_seq_show(struct seq_file *seq, void *v)
2270 {
2271 	struct tcp_iter_state *st;
2272 	struct sock *sk = v;
2273 
2274 	if (v == SEQ_START_TOKEN) {
2275 		seq_puts(seq,
2276 			 "  sl  "
2277 			 "local_address                         "
2278 			 "remote_address                        "
2279 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2280 			 "   uid  timeout inode\n");
2281 		goto out;
2282 	}
2283 	st = seq->private;
2284 
2285 	if (sk->sk_state == TCP_TIME_WAIT)
2286 		get_timewait6_sock(seq, v, st->num);
2287 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2288 		get_openreq6(seq, v, st->num);
2289 	else
2290 		get_tcp6_sock(seq, v, st->num);
2291 out:
2292 	return 0;
2293 }
2294 
2295 static const struct seq_operations tcp6_seq_ops = {
2296 	.show		= tcp6_seq_show,
2297 	.start		= tcp_seq_start,
2298 	.next		= tcp_seq_next,
2299 	.stop		= tcp_seq_stop,
2300 };
2301 
2302 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2303 	.family		= AF_INET6,
2304 };
2305 
2306 int __net_init tcp6_proc_init(struct net *net)
2307 {
2308 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2309 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2310 		return -ENOMEM;
2311 	return 0;
2312 }
2313 
2314 void tcp6_proc_exit(struct net *net)
2315 {
2316 	remove_proc_entry("tcp6", net->proc_net);
2317 }
2318 #endif
2319 
2320 struct proto tcpv6_prot = {
2321 	.name			= "TCPv6",
2322 	.owner			= THIS_MODULE,
2323 	.close			= tcp_close,
2324 	.pre_connect		= tcp_v6_pre_connect,
2325 	.connect		= tcp_v6_connect,
2326 	.disconnect		= tcp_disconnect,
2327 	.accept			= inet_csk_accept,
2328 	.ioctl			= tcp_ioctl,
2329 	.init			= tcp_v6_init_sock,
2330 	.destroy		= tcp_v4_destroy_sock,
2331 	.shutdown		= tcp_shutdown,
2332 	.setsockopt		= tcp_setsockopt,
2333 	.getsockopt		= tcp_getsockopt,
2334 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2335 	.keepalive		= tcp_set_keepalive,
2336 	.recvmsg		= tcp_recvmsg,
2337 	.sendmsg		= tcp_sendmsg,
2338 	.splice_eof		= tcp_splice_eof,
2339 	.backlog_rcv		= tcp_v6_do_rcv,
2340 	.release_cb		= tcp_release_cb,
2341 	.hash			= inet6_hash,
2342 	.unhash			= inet_unhash,
2343 	.get_port		= inet_csk_get_port,
2344 	.put_port		= inet_put_port,
2345 #ifdef CONFIG_BPF_SYSCALL
2346 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2347 #endif
2348 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2349 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2350 	.stream_memory_free	= tcp_stream_memory_free,
2351 	.sockets_allocated	= &tcp_sockets_allocated,
2352 
2353 	.memory_allocated	= &tcp_memory_allocated,
2354 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2355 
2356 	.memory_pressure	= &tcp_memory_pressure,
2357 	.orphan_count		= &tcp_orphan_count,
2358 	.sysctl_mem		= sysctl_tcp_mem,
2359 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2360 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2361 	.max_header		= MAX_TCP_HEADER,
2362 	.obj_size		= sizeof(struct tcp6_sock),
2363 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2364 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2365 	.twsk_prot		= &tcp6_timewait_sock_ops,
2366 	.rsk_prot		= &tcp6_request_sock_ops,
2367 	.h.hashinfo		= NULL,
2368 	.no_autobind		= true,
2369 	.diag_destroy		= tcp_abort,
2370 };
2371 EXPORT_SYMBOL_GPL(tcpv6_prot);
2372 
2373 
2374 static struct inet_protosw tcpv6_protosw = {
2375 	.type		=	SOCK_STREAM,
2376 	.protocol	=	IPPROTO_TCP,
2377 	.prot		=	&tcpv6_prot,
2378 	.ops		=	&inet6_stream_ops,
2379 	.flags		=	INET_PROTOSW_PERMANENT |
2380 				INET_PROTOSW_ICSK,
2381 };
2382 
2383 static int __net_init tcpv6_net_init(struct net *net)
2384 {
2385 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2386 				    SOCK_RAW, IPPROTO_TCP, net);
2387 }
2388 
2389 static void __net_exit tcpv6_net_exit(struct net *net)
2390 {
2391 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2392 }
2393 
2394 static struct pernet_operations tcpv6_net_ops = {
2395 	.init	    = tcpv6_net_init,
2396 	.exit	    = tcpv6_net_exit,
2397 };
2398 
2399 int __init tcpv6_init(void)
2400 {
2401 	int ret;
2402 
2403 	net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
2404 		.handler     = tcp_v6_rcv,
2405 		.err_handler = tcp_v6_err,
2406 		.flags	     = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
2407 	};
2408 	ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2409 	if (ret)
2410 		goto out;
2411 
2412 	/* register inet6 protocol */
2413 	ret = inet6_register_protosw(&tcpv6_protosw);
2414 	if (ret)
2415 		goto out_tcpv6_protocol;
2416 
2417 	ret = register_pernet_subsys(&tcpv6_net_ops);
2418 	if (ret)
2419 		goto out_tcpv6_protosw;
2420 
2421 	ret = mptcpv6_init();
2422 	if (ret)
2423 		goto out_tcpv6_pernet_subsys;
2424 
2425 out:
2426 	return ret;
2427 
2428 out_tcpv6_pernet_subsys:
2429 	unregister_pernet_subsys(&tcpv6_net_ops);
2430 out_tcpv6_protosw:
2431 	inet6_unregister_protosw(&tcpv6_protosw);
2432 out_tcpv6_protocol:
2433 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2434 	goto out;
2435 }
2436 
2437 void tcpv6_exit(void)
2438 {
2439 	unregister_pernet_subsys(&tcpv6_net_ops);
2440 	inet6_unregister_protosw(&tcpv6_protosw);
2441 	inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
2442 }
2443