1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 const struct in6_addr *nexthop;
64 struct neighbour *neigh;
65 int ret;
66
67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 ((mroute6_is_socket(net, skb) &&
72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 &ipv6_hdr(skb)->saddr))) {
75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77 /* Do not check for IFF_ALLMULTI; multicast routing
78 is not supported in any case.
79 */
80 if (newskb)
81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 net, sk, newskb, NULL, newskb->dev,
83 dev_loopback_xmit);
84
85 if (ipv6_hdr(skb)->hop_limit == 0) {
86 IP6_INC_STATS(net, idev,
87 IPSTATS_MIB_OUTDISCARDS);
88 kfree_skb(skb);
89 return 0;
90 }
91 }
92
93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 IPV6_ADDR_SCOPE_NODELOCAL &&
97 !(dev->flags & IFF_LOOPBACK)) {
98 kfree_skb(skb);
99 return 0;
100 }
101 }
102
103 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 int res = lwtunnel_xmit(skb);
105
106 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 return res;
108 }
109
110 rcu_read_lock_bh();
111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 if (unlikely(!neigh))
114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 if (!IS_ERR(neigh)) {
116 sock_confirm_neigh(skb, neigh);
117 ret = neigh_output(neigh, skb, false);
118 rcu_read_unlock_bh();
119 return ret;
120 }
121 rcu_read_unlock_bh();
122
123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 kfree_skb(skb);
125 return -EINVAL;
126 }
127
128 static int
ip6_finish_output_gso_slowpath_drop(struct net * net,struct sock * sk,struct sk_buff * skb,unsigned int mtu)129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
130 struct sk_buff *skb, unsigned int mtu)
131 {
132 struct sk_buff *segs, *nskb;
133 netdev_features_t features;
134 int ret = 0;
135
136 /* Please see corresponding comment in ip_finish_output_gso
137 * describing the cases where GSO segment length exceeds the
138 * egress MTU.
139 */
140 features = netif_skb_features(skb);
141 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
142 if (IS_ERR_OR_NULL(segs)) {
143 kfree_skb(skb);
144 return -ENOMEM;
145 }
146
147 consume_skb(skb);
148
149 skb_list_walk_safe(segs, segs, nskb) {
150 int err;
151
152 skb_mark_not_on_list(segs);
153 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
154 if (err && ret == 0)
155 ret = err;
156 }
157
158 return ret;
159 }
160
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163 unsigned int mtu;
164
165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
166 /* Policy lookup after SNAT yielded a new policy */
167 if (skb_dst(skb)->xfrm) {
168 IPCB(skb)->flags |= IPSKB_REROUTED;
169 return dst_output(net, sk, skb);
170 }
171 #endif
172
173 mtu = ip6_skb_dst_mtu(skb);
174 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
175 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
176
177 if ((skb->len > mtu && !skb_is_gso(skb)) ||
178 dst_allfrag(skb_dst(skb)) ||
179 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
180 return ip6_fragment(net, sk, skb, ip6_finish_output2);
181 else
182 return ip6_finish_output2(net, sk, skb);
183 }
184
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
186 {
187 int ret;
188
189 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
190 switch (ret) {
191 case NET_XMIT_SUCCESS:
192 return __ip6_finish_output(net, sk, skb);
193 case NET_XMIT_CN:
194 return __ip6_finish_output(net, sk, skb) ? : ret;
195 default:
196 kfree_skb(skb);
197 return ret;
198 }
199 }
200
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
202 {
203 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
205
206 skb->protocol = htons(ETH_P_IPV6);
207 skb->dev = dev;
208
209 if (unlikely(idev->cnf.disable_ipv6)) {
210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
211 kfree_skb(skb);
212 return 0;
213 }
214
215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
216 net, sk, skb, indev, dev,
217 ip6_finish_output,
218 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
219 }
220 EXPORT_SYMBOL(ip6_output);
221
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)222 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
223 {
224 if (!np->autoflowlabel_set)
225 return ip6_default_np_autolabel(net);
226 else
227 return np->autoflowlabel;
228 }
229
230 /*
231 * xmit an sk_buff (used by TCP, SCTP and DCCP)
232 * Note : socket lock is not held for SYNACK packets, but might be modified
233 * by calls to skb_set_owner_w() and ipv6_local_error(),
234 * which are using proper atomic operations or spinlocks.
235 */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)236 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
237 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
238 {
239 struct net *net = sock_net(sk);
240 const struct ipv6_pinfo *np = inet6_sk(sk);
241 struct in6_addr *first_hop = &fl6->daddr;
242 struct dst_entry *dst = skb_dst(skb);
243 unsigned int head_room;
244 struct ipv6hdr *hdr;
245 u8 proto = fl6->flowi6_proto;
246 int seg_len = skb->len;
247 int hlimit = -1;
248 u32 mtu;
249
250 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
251 if (opt)
252 head_room += opt->opt_nflen + opt->opt_flen;
253
254 if (unlikely(skb_headroom(skb) < head_room)) {
255 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
256 if (!skb2) {
257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
258 IPSTATS_MIB_OUTDISCARDS);
259 kfree_skb(skb);
260 return -ENOBUFS;
261 }
262 if (skb->sk)
263 skb_set_owner_w(skb2, skb->sk);
264 consume_skb(skb);
265 skb = skb2;
266 }
267
268 if (opt) {
269 seg_len += opt->opt_nflen + opt->opt_flen;
270
271 if (opt->opt_flen)
272 ipv6_push_frag_opts(skb, opt, &proto);
273
274 if (opt->opt_nflen)
275 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
276 &fl6->saddr);
277 }
278
279 skb_push(skb, sizeof(struct ipv6hdr));
280 skb_reset_network_header(skb);
281 hdr = ipv6_hdr(skb);
282
283 /*
284 * Fill in the IPv6 header
285 */
286 if (np)
287 hlimit = np->hop_limit;
288 if (hlimit < 0)
289 hlimit = ip6_dst_hoplimit(dst);
290
291 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
292 ip6_autoflowlabel(net, np), fl6));
293
294 hdr->payload_len = htons(seg_len);
295 hdr->nexthdr = proto;
296 hdr->hop_limit = hlimit;
297
298 hdr->saddr = fl6->saddr;
299 hdr->daddr = *first_hop;
300
301 skb->protocol = htons(ETH_P_IPV6);
302 skb->priority = priority;
303 skb->mark = mark;
304
305 mtu = dst_mtu(dst);
306 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
307 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
308 IPSTATS_MIB_OUT, skb->len);
309
310 /* if egress device is enslaved to an L3 master device pass the
311 * skb to its handler for processing
312 */
313 skb = l3mdev_ip6_out((struct sock *)sk, skb);
314 if (unlikely(!skb))
315 return 0;
316
317 /* hooks should never assume socket lock is held.
318 * we promote our socket to non const
319 */
320 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
321 net, (struct sock *)sk, skb, NULL, dst->dev,
322 dst_output);
323 }
324
325 skb->dev = dst->dev;
326 /* ipv6_local_error() does not require socket lock,
327 * we promote our socket to non const
328 */
329 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
330
331 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
332 kfree_skb(skb);
333 return -EMSGSIZE;
334 }
335 EXPORT_SYMBOL(ip6_xmit);
336
ip6_call_ra_chain(struct sk_buff * skb,int sel)337 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
338 {
339 struct ip6_ra_chain *ra;
340 struct sock *last = NULL;
341
342 read_lock(&ip6_ra_lock);
343 for (ra = ip6_ra_chain; ra; ra = ra->next) {
344 struct sock *sk = ra->sk;
345 if (sk && ra->sel == sel &&
346 (!sk->sk_bound_dev_if ||
347 sk->sk_bound_dev_if == skb->dev->ifindex)) {
348 struct ipv6_pinfo *np = inet6_sk(sk);
349
350 if (np && np->rtalert_isolate &&
351 !net_eq(sock_net(sk), dev_net(skb->dev))) {
352 continue;
353 }
354 if (last) {
355 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
356 if (skb2)
357 rawv6_rcv(last, skb2);
358 }
359 last = sk;
360 }
361 }
362
363 if (last) {
364 rawv6_rcv(last, skb);
365 read_unlock(&ip6_ra_lock);
366 return 1;
367 }
368 read_unlock(&ip6_ra_lock);
369 return 0;
370 }
371
ip6_forward_proxy_check(struct sk_buff * skb)372 static int ip6_forward_proxy_check(struct sk_buff *skb)
373 {
374 struct ipv6hdr *hdr = ipv6_hdr(skb);
375 u8 nexthdr = hdr->nexthdr;
376 __be16 frag_off;
377 int offset;
378
379 if (ipv6_ext_hdr(nexthdr)) {
380 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
381 if (offset < 0)
382 return 0;
383 } else
384 offset = sizeof(struct ipv6hdr);
385
386 if (nexthdr == IPPROTO_ICMPV6) {
387 struct icmp6hdr *icmp6;
388
389 if (!pskb_may_pull(skb, (skb_network_header(skb) +
390 offset + 1 - skb->data)))
391 return 0;
392
393 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
394
395 switch (icmp6->icmp6_type) {
396 case NDISC_ROUTER_SOLICITATION:
397 case NDISC_ROUTER_ADVERTISEMENT:
398 case NDISC_NEIGHBOUR_SOLICITATION:
399 case NDISC_NEIGHBOUR_ADVERTISEMENT:
400 case NDISC_REDIRECT:
401 /* For reaction involving unicast neighbor discovery
402 * message destined to the proxied address, pass it to
403 * input function.
404 */
405 return 1;
406 default:
407 break;
408 }
409 }
410
411 /*
412 * The proxying router can't forward traffic sent to a link-local
413 * address, so signal the sender and discard the packet. This
414 * behavior is clarified by the MIPv6 specification.
415 */
416 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
417 dst_link_failure(skb);
418 return -1;
419 }
420
421 return 0;
422 }
423
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)424 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
425 struct sk_buff *skb)
426 {
427 struct dst_entry *dst = skb_dst(skb);
428
429 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
430 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
431
432 #ifdef CONFIG_NET_SWITCHDEV
433 if (skb->offload_l3_fwd_mark) {
434 consume_skb(skb);
435 return 0;
436 }
437 #endif
438
439 skb->tstamp = 0;
440 return dst_output(net, sk, skb);
441 }
442
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)443 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
444 {
445 if (skb->len <= mtu)
446 return false;
447
448 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
449 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
450 return true;
451
452 if (skb->ignore_df)
453 return false;
454
455 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
456 return false;
457
458 return true;
459 }
460
ip6_forward(struct sk_buff * skb)461 int ip6_forward(struct sk_buff *skb)
462 {
463 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
464 struct dst_entry *dst = skb_dst(skb);
465 struct ipv6hdr *hdr = ipv6_hdr(skb);
466 struct inet6_skb_parm *opt = IP6CB(skb);
467 struct net *net = dev_net(dst->dev);
468 u32 mtu;
469
470 if (net->ipv6.devconf_all->forwarding == 0)
471 goto error;
472
473 if (skb->pkt_type != PACKET_HOST)
474 goto drop;
475
476 if (unlikely(skb->sk))
477 goto drop;
478
479 if (skb_warn_if_lro(skb))
480 goto drop;
481
482 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
483 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
484 goto drop;
485 }
486
487 skb_forward_csum(skb);
488
489 /*
490 * We DO NOT make any processing on
491 * RA packets, pushing them to user level AS IS
492 * without ane WARRANTY that application will be able
493 * to interpret them. The reason is that we
494 * cannot make anything clever here.
495 *
496 * We are not end-node, so that if packet contains
497 * AH/ESP, we cannot make anything.
498 * Defragmentation also would be mistake, RA packets
499 * cannot be fragmented, because there is no warranty
500 * that different fragments will go along one path. --ANK
501 */
502 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
503 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
504 return 0;
505 }
506
507 /*
508 * check and decrement ttl
509 */
510 if (hdr->hop_limit <= 1) {
511 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
512 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
513
514 kfree_skb(skb);
515 return -ETIMEDOUT;
516 }
517
518 /* XXX: idev->cnf.proxy_ndp? */
519 if (net->ipv6.devconf_all->proxy_ndp &&
520 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
521 int proxied = ip6_forward_proxy_check(skb);
522 if (proxied > 0)
523 return ip6_input(skb);
524 else if (proxied < 0) {
525 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
526 goto drop;
527 }
528 }
529
530 if (!xfrm6_route_forward(skb)) {
531 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
532 goto drop;
533 }
534 dst = skb_dst(skb);
535
536 /* IPv6 specs say nothing about it, but it is clear that we cannot
537 send redirects to source routed frames.
538 We don't send redirects to frames decapsulated from IPsec.
539 */
540 if (IP6CB(skb)->iif == dst->dev->ifindex &&
541 opt->srcrt == 0 && !skb_sec_path(skb)) {
542 struct in6_addr *target = NULL;
543 struct inet_peer *peer;
544 struct rt6_info *rt;
545
546 /*
547 * incoming and outgoing devices are the same
548 * send a redirect.
549 */
550
551 rt = (struct rt6_info *) dst;
552 if (rt->rt6i_flags & RTF_GATEWAY)
553 target = &rt->rt6i_gateway;
554 else
555 target = &hdr->daddr;
556
557 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
558
559 /* Limit redirects both by destination (here)
560 and by source (inside ndisc_send_redirect)
561 */
562 if (inet_peer_xrlim_allow(peer, 1*HZ))
563 ndisc_send_redirect(skb, target);
564 if (peer)
565 inet_putpeer(peer);
566 } else {
567 int addrtype = ipv6_addr_type(&hdr->saddr);
568
569 /* This check is security critical. */
570 if (addrtype == IPV6_ADDR_ANY ||
571 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
572 goto error;
573 if (addrtype & IPV6_ADDR_LINKLOCAL) {
574 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
575 ICMPV6_NOT_NEIGHBOUR, 0);
576 goto error;
577 }
578 }
579
580 mtu = ip6_dst_mtu_forward(dst);
581 if (mtu < IPV6_MIN_MTU)
582 mtu = IPV6_MIN_MTU;
583
584 if (ip6_pkt_too_big(skb, mtu)) {
585 /* Again, force OUTPUT device used as source address */
586 skb->dev = dst->dev;
587 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
588 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
589 __IP6_INC_STATS(net, ip6_dst_idev(dst),
590 IPSTATS_MIB_FRAGFAILS);
591 kfree_skb(skb);
592 return -EMSGSIZE;
593 }
594
595 if (skb_cow(skb, dst->dev->hard_header_len)) {
596 __IP6_INC_STATS(net, ip6_dst_idev(dst),
597 IPSTATS_MIB_OUTDISCARDS);
598 goto drop;
599 }
600
601 hdr = ipv6_hdr(skb);
602
603 /* Mangling hops number delayed to point after skb COW */
604
605 hdr->hop_limit--;
606
607 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
608 net, NULL, skb, skb->dev, dst->dev,
609 ip6_forward_finish);
610
611 error:
612 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
613 drop:
614 kfree_skb(skb);
615 return -EINVAL;
616 }
617
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)618 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
619 {
620 to->pkt_type = from->pkt_type;
621 to->priority = from->priority;
622 to->protocol = from->protocol;
623 skb_dst_drop(to);
624 skb_dst_set(to, dst_clone(skb_dst(from)));
625 to->dev = from->dev;
626 to->mark = from->mark;
627
628 skb_copy_hash(to, from);
629
630 #ifdef CONFIG_NET_SCHED
631 to->tc_index = from->tc_index;
632 #endif
633 nf_copy(to, from);
634 skb_ext_copy(to, from);
635 skb_copy_secmark(to, from);
636 }
637
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)638 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
639 u8 nexthdr, __be32 frag_id,
640 struct ip6_fraglist_iter *iter)
641 {
642 unsigned int first_len;
643 struct frag_hdr *fh;
644
645 /* BUILD HEADER */
646 *prevhdr = NEXTHDR_FRAGMENT;
647 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
648 if (!iter->tmp_hdr)
649 return -ENOMEM;
650
651 iter->frag = skb_shinfo(skb)->frag_list;
652 skb_frag_list_init(skb);
653
654 iter->offset = 0;
655 iter->hlen = hlen;
656 iter->frag_id = frag_id;
657 iter->nexthdr = nexthdr;
658
659 __skb_pull(skb, hlen);
660 fh = __skb_push(skb, sizeof(struct frag_hdr));
661 __skb_push(skb, hlen);
662 skb_reset_network_header(skb);
663 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
664
665 fh->nexthdr = nexthdr;
666 fh->reserved = 0;
667 fh->frag_off = htons(IP6_MF);
668 fh->identification = frag_id;
669
670 first_len = skb_pagelen(skb);
671 skb->data_len = first_len - skb_headlen(skb);
672 skb->len = first_len;
673 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
674
675 return 0;
676 }
677 EXPORT_SYMBOL(ip6_fraglist_init);
678
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)679 void ip6_fraglist_prepare(struct sk_buff *skb,
680 struct ip6_fraglist_iter *iter)
681 {
682 struct sk_buff *frag = iter->frag;
683 unsigned int hlen = iter->hlen;
684 struct frag_hdr *fh;
685
686 frag->ip_summed = CHECKSUM_NONE;
687 skb_reset_transport_header(frag);
688 fh = __skb_push(frag, sizeof(struct frag_hdr));
689 __skb_push(frag, hlen);
690 skb_reset_network_header(frag);
691 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
692 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
693 fh->nexthdr = iter->nexthdr;
694 fh->reserved = 0;
695 fh->frag_off = htons(iter->offset);
696 if (frag->next)
697 fh->frag_off |= htons(IP6_MF);
698 fh->identification = iter->frag_id;
699 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
700 ip6_copy_metadata(frag, skb);
701 }
702 EXPORT_SYMBOL(ip6_fraglist_prepare);
703
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)704 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
705 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
706 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
707 {
708 state->prevhdr = prevhdr;
709 state->nexthdr = nexthdr;
710 state->frag_id = frag_id;
711
712 state->hlen = hlen;
713 state->mtu = mtu;
714
715 state->left = skb->len - hlen; /* Space per frame */
716 state->ptr = hlen; /* Where to start from */
717
718 state->hroom = hdr_room;
719 state->troom = needed_tailroom;
720
721 state->offset = 0;
722 }
723 EXPORT_SYMBOL(ip6_frag_init);
724
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)725 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
726 {
727 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
728 struct sk_buff *frag;
729 struct frag_hdr *fh;
730 unsigned int len;
731
732 len = state->left;
733 /* IF: it doesn't fit, use 'mtu' - the data space left */
734 if (len > state->mtu)
735 len = state->mtu;
736 /* IF: we are not sending up to and including the packet end
737 then align the next start on an eight byte boundary */
738 if (len < state->left)
739 len &= ~7;
740
741 /* Allocate buffer */
742 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
743 state->hroom + state->troom, GFP_ATOMIC);
744 if (!frag)
745 return ERR_PTR(-ENOMEM);
746
747 /*
748 * Set up data on packet
749 */
750
751 ip6_copy_metadata(frag, skb);
752 skb_reserve(frag, state->hroom);
753 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
754 skb_reset_network_header(frag);
755 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
756 frag->transport_header = (frag->network_header + state->hlen +
757 sizeof(struct frag_hdr));
758
759 /*
760 * Charge the memory for the fragment to any owner
761 * it might possess
762 */
763 if (skb->sk)
764 skb_set_owner_w(frag, skb->sk);
765
766 /*
767 * Copy the packet header into the new buffer.
768 */
769 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
770
771 fragnexthdr_offset = skb_network_header(frag);
772 fragnexthdr_offset += prevhdr - skb_network_header(skb);
773 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
774
775 /*
776 * Build fragment header.
777 */
778 fh->nexthdr = state->nexthdr;
779 fh->reserved = 0;
780 fh->identification = state->frag_id;
781
782 /*
783 * Copy a block of the IP datagram.
784 */
785 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
786 len));
787 state->left -= len;
788
789 fh->frag_off = htons(state->offset);
790 if (state->left > 0)
791 fh->frag_off |= htons(IP6_MF);
792 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
793
794 state->ptr += len;
795 state->offset += len;
796
797 return frag;
798 }
799 EXPORT_SYMBOL(ip6_frag_next);
800
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))801 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
802 int (*output)(struct net *, struct sock *, struct sk_buff *))
803 {
804 struct sk_buff *frag;
805 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
806 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
807 inet6_sk(skb->sk) : NULL;
808 struct ip6_frag_state state;
809 unsigned int mtu, hlen, nexthdr_offset;
810 ktime_t tstamp = skb->tstamp;
811 int hroom, err = 0;
812 __be32 frag_id;
813 u8 *prevhdr, nexthdr = 0;
814
815 err = ip6_find_1stfragopt(skb, &prevhdr);
816 if (err < 0)
817 goto fail;
818 hlen = err;
819 nexthdr = *prevhdr;
820 nexthdr_offset = prevhdr - skb_network_header(skb);
821
822 mtu = ip6_skb_dst_mtu(skb);
823
824 /* We must not fragment if the socket is set to force MTU discovery
825 * or if the skb it not generated by a local socket.
826 */
827 if (unlikely(!skb->ignore_df && skb->len > mtu))
828 goto fail_toobig;
829
830 if (IP6CB(skb)->frag_max_size) {
831 if (IP6CB(skb)->frag_max_size > mtu)
832 goto fail_toobig;
833
834 /* don't send fragments larger than what we received */
835 mtu = IP6CB(skb)->frag_max_size;
836 if (mtu < IPV6_MIN_MTU)
837 mtu = IPV6_MIN_MTU;
838 }
839
840 if (np && np->frag_size < mtu) {
841 if (np->frag_size)
842 mtu = np->frag_size;
843 }
844 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
845 goto fail_toobig;
846 mtu -= hlen + sizeof(struct frag_hdr);
847
848 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
849 &ipv6_hdr(skb)->saddr);
850
851 if (skb->ip_summed == CHECKSUM_PARTIAL &&
852 (err = skb_checksum_help(skb)))
853 goto fail;
854
855 prevhdr = skb_network_header(skb) + nexthdr_offset;
856 hroom = LL_RESERVED_SPACE(rt->dst.dev);
857 if (skb_has_frag_list(skb)) {
858 unsigned int first_len = skb_pagelen(skb);
859 struct ip6_fraglist_iter iter;
860 struct sk_buff *frag2;
861
862 if (first_len - hlen > mtu ||
863 ((first_len - hlen) & 7) ||
864 skb_cloned(skb) ||
865 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
866 goto slow_path;
867
868 skb_walk_frags(skb, frag) {
869 /* Correct geometry. */
870 if (frag->len > mtu ||
871 ((frag->len & 7) && frag->next) ||
872 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
873 goto slow_path_clean;
874
875 /* Partially cloned skb? */
876 if (skb_shared(frag))
877 goto slow_path_clean;
878
879 BUG_ON(frag->sk);
880 if (skb->sk) {
881 frag->sk = skb->sk;
882 frag->destructor = sock_wfree;
883 }
884 skb->truesize -= frag->truesize;
885 }
886
887 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
888 &iter);
889 if (err < 0)
890 goto fail;
891
892 for (;;) {
893 /* Prepare header of the next frame,
894 * before previous one went down. */
895 if (iter.frag)
896 ip6_fraglist_prepare(skb, &iter);
897
898 skb->tstamp = tstamp;
899 err = output(net, sk, skb);
900 if (!err)
901 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
902 IPSTATS_MIB_FRAGCREATES);
903
904 if (err || !iter.frag)
905 break;
906
907 skb = ip6_fraglist_next(&iter);
908 }
909
910 kfree(iter.tmp_hdr);
911
912 if (err == 0) {
913 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
914 IPSTATS_MIB_FRAGOKS);
915 return 0;
916 }
917
918 kfree_skb_list(iter.frag);
919
920 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
921 IPSTATS_MIB_FRAGFAILS);
922 return err;
923
924 slow_path_clean:
925 skb_walk_frags(skb, frag2) {
926 if (frag2 == frag)
927 break;
928 frag2->sk = NULL;
929 frag2->destructor = NULL;
930 skb->truesize += frag2->truesize;
931 }
932 }
933
934 slow_path:
935 /*
936 * Fragment the datagram.
937 */
938
939 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
940 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
941 &state);
942
943 /*
944 * Keep copying data until we run out.
945 */
946
947 while (state.left > 0) {
948 frag = ip6_frag_next(skb, &state);
949 if (IS_ERR(frag)) {
950 err = PTR_ERR(frag);
951 goto fail;
952 }
953
954 /*
955 * Put this fragment into the sending queue.
956 */
957 frag->tstamp = tstamp;
958 err = output(net, sk, frag);
959 if (err)
960 goto fail;
961
962 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
963 IPSTATS_MIB_FRAGCREATES);
964 }
965 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
966 IPSTATS_MIB_FRAGOKS);
967 consume_skb(skb);
968 return err;
969
970 fail_toobig:
971 if (skb->sk && dst_allfrag(skb_dst(skb)))
972 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
973
974 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
975 err = -EMSGSIZE;
976
977 fail:
978 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
979 IPSTATS_MIB_FRAGFAILS);
980 kfree_skb(skb);
981 return err;
982 }
983
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)984 static inline int ip6_rt_check(const struct rt6key *rt_key,
985 const struct in6_addr *fl_addr,
986 const struct in6_addr *addr_cache)
987 {
988 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
989 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
990 }
991
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)992 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
993 struct dst_entry *dst,
994 const struct flowi6 *fl6)
995 {
996 struct ipv6_pinfo *np = inet6_sk(sk);
997 struct rt6_info *rt;
998
999 if (!dst)
1000 goto out;
1001
1002 if (dst->ops->family != AF_INET6) {
1003 dst_release(dst);
1004 return NULL;
1005 }
1006
1007 rt = (struct rt6_info *)dst;
1008 /* Yes, checking route validity in not connected
1009 * case is not very simple. Take into account,
1010 * that we do not support routing by source, TOS,
1011 * and MSG_DONTROUTE --ANK (980726)
1012 *
1013 * 1. ip6_rt_check(): If route was host route,
1014 * check that cached destination is current.
1015 * If it is network route, we still may
1016 * check its validity using saved pointer
1017 * to the last used address: daddr_cache.
1018 * We do not want to save whole address now,
1019 * (because main consumer of this service
1020 * is tcp, which has not this problem),
1021 * so that the last trick works only on connected
1022 * sockets.
1023 * 2. oif also should be the same.
1024 */
1025 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1026 #ifdef CONFIG_IPV6_SUBTREES
1027 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1028 #endif
1029 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1030 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1031 dst_release(dst);
1032 dst = NULL;
1033 }
1034
1035 out:
1036 return dst;
1037 }
1038
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1039 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1040 struct dst_entry **dst, struct flowi6 *fl6)
1041 {
1042 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1043 struct neighbour *n;
1044 struct rt6_info *rt;
1045 #endif
1046 int err;
1047 int flags = 0;
1048
1049 /* The correct way to handle this would be to do
1050 * ip6_route_get_saddr, and then ip6_route_output; however,
1051 * the route-specific preferred source forces the
1052 * ip6_route_output call _before_ ip6_route_get_saddr.
1053 *
1054 * In source specific routing (no src=any default route),
1055 * ip6_route_output will fail given src=any saddr, though, so
1056 * that's why we try it again later.
1057 */
1058 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1059 struct fib6_info *from;
1060 struct rt6_info *rt;
1061 bool had_dst = *dst != NULL;
1062
1063 if (!had_dst)
1064 *dst = ip6_route_output(net, sk, fl6);
1065 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1066
1067 rcu_read_lock();
1068 from = rt ? rcu_dereference(rt->from) : NULL;
1069 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1070 sk ? inet6_sk(sk)->srcprefs : 0,
1071 &fl6->saddr);
1072 rcu_read_unlock();
1073
1074 if (err)
1075 goto out_err_release;
1076
1077 /* If we had an erroneous initial result, pretend it
1078 * never existed and let the SA-enabled version take
1079 * over.
1080 */
1081 if (!had_dst && (*dst)->error) {
1082 dst_release(*dst);
1083 *dst = NULL;
1084 }
1085
1086 if (fl6->flowi6_oif)
1087 flags |= RT6_LOOKUP_F_IFACE;
1088 }
1089
1090 if (!*dst)
1091 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1092
1093 err = (*dst)->error;
1094 if (err)
1095 goto out_err_release;
1096
1097 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1098 /*
1099 * Here if the dst entry we've looked up
1100 * has a neighbour entry that is in the INCOMPLETE
1101 * state and the src address from the flow is
1102 * marked as OPTIMISTIC, we release the found
1103 * dst entry and replace it instead with the
1104 * dst entry of the nexthop router
1105 */
1106 rt = (struct rt6_info *) *dst;
1107 rcu_read_lock_bh();
1108 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1109 rt6_nexthop(rt, &fl6->daddr));
1110 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1111 rcu_read_unlock_bh();
1112
1113 if (err) {
1114 struct inet6_ifaddr *ifp;
1115 struct flowi6 fl_gw6;
1116 int redirect;
1117
1118 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1119 (*dst)->dev, 1);
1120
1121 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1122 if (ifp)
1123 in6_ifa_put(ifp);
1124
1125 if (redirect) {
1126 /*
1127 * We need to get the dst entry for the
1128 * default router instead
1129 */
1130 dst_release(*dst);
1131 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1132 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1133 *dst = ip6_route_output(net, sk, &fl_gw6);
1134 err = (*dst)->error;
1135 if (err)
1136 goto out_err_release;
1137 }
1138 }
1139 #endif
1140 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1141 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1142 err = -EAFNOSUPPORT;
1143 goto out_err_release;
1144 }
1145
1146 return 0;
1147
1148 out_err_release:
1149 dst_release(*dst);
1150 *dst = NULL;
1151
1152 if (err == -ENETUNREACH)
1153 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1154 return err;
1155 }
1156
1157 /**
1158 * ip6_dst_lookup - perform route lookup on flow
1159 * @net: Network namespace to perform lookup in
1160 * @sk: socket which provides route info
1161 * @dst: pointer to dst_entry * for result
1162 * @fl6: flow to lookup
1163 *
1164 * This function performs a route lookup on the given flow.
1165 *
1166 * It returns zero on success, or a standard errno code on error.
1167 */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1168 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1169 struct flowi6 *fl6)
1170 {
1171 *dst = NULL;
1172 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1173 }
1174 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1175
1176 /**
1177 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1178 * @net: Network namespace to perform lookup in
1179 * @sk: socket which provides route info
1180 * @fl6: flow to lookup
1181 * @final_dst: final destination address for ipsec lookup
1182 *
1183 * This function performs a route lookup on the given flow.
1184 *
1185 * It returns a valid dst pointer on success, or a pointer encoded
1186 * error code.
1187 */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1188 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1189 const struct in6_addr *final_dst)
1190 {
1191 struct dst_entry *dst = NULL;
1192 int err;
1193
1194 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1195 if (err)
1196 return ERR_PTR(err);
1197 if (final_dst)
1198 fl6->daddr = *final_dst;
1199
1200 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1201 }
1202 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1203
1204 /**
1205 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1206 * @sk: socket which provides the dst cache and route info
1207 * @fl6: flow to lookup
1208 * @final_dst: final destination address for ipsec lookup
1209 * @connected: whether @sk is connected or not
1210 *
1211 * This function performs a route lookup on the given flow with the
1212 * possibility of using the cached route in the socket if it is valid.
1213 * It will take the socket dst lock when operating on the dst cache.
1214 * As a result, this function can only be used in process context.
1215 *
1216 * In addition, for a connected socket, cache the dst in the socket
1217 * if the current cache is not valid.
1218 *
1219 * It returns a valid dst pointer on success, or a pointer encoded
1220 * error code.
1221 */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1222 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1223 const struct in6_addr *final_dst,
1224 bool connected)
1225 {
1226 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1227
1228 dst = ip6_sk_dst_check(sk, dst, fl6);
1229 if (dst)
1230 return dst;
1231
1232 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1233 if (connected && !IS_ERR(dst))
1234 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1235
1236 return dst;
1237 }
1238 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1239
1240 /**
1241 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1242 * @skb: Packet for which lookup is done
1243 * @dev: Tunnel device
1244 * @net: Network namespace of tunnel device
1245 * @sock: Socket which provides route info
1246 * @saddr: Memory to store the src ip address
1247 * @info: Tunnel information
1248 * @protocol: IP protocol
1249 * @use_cache: Flag to enable cache usage
1250 * This function performs a route lookup on a tunnel
1251 *
1252 * It returns a valid dst pointer and stores src address to be used in
1253 * tunnel in param saddr on success, else a pointer encoded error code.
1254 */
1255
ip6_dst_lookup_tunnel(struct sk_buff * skb,struct net_device * dev,struct net * net,struct socket * sock,struct in6_addr * saddr,const struct ip_tunnel_info * info,u8 protocol,bool use_cache)1256 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1257 struct net_device *dev,
1258 struct net *net,
1259 struct socket *sock,
1260 struct in6_addr *saddr,
1261 const struct ip_tunnel_info *info,
1262 u8 protocol,
1263 bool use_cache)
1264 {
1265 struct dst_entry *dst = NULL;
1266 #ifdef CONFIG_DST_CACHE
1267 struct dst_cache *dst_cache;
1268 #endif
1269 struct flowi6 fl6;
1270 __u8 prio;
1271
1272 #ifdef CONFIG_DST_CACHE
1273 dst_cache = (struct dst_cache *)&info->dst_cache;
1274 if (use_cache) {
1275 dst = dst_cache_get_ip6(dst_cache, saddr);
1276 if (dst)
1277 return dst;
1278 }
1279 #endif
1280 memset(&fl6, 0, sizeof(fl6));
1281 fl6.flowi6_mark = skb->mark;
1282 fl6.flowi6_proto = protocol;
1283 fl6.daddr = info->key.u.ipv6.dst;
1284 fl6.saddr = info->key.u.ipv6.src;
1285 prio = info->key.tos;
1286 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1287 info->key.label);
1288
1289 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1290 NULL);
1291 if (IS_ERR(dst)) {
1292 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1293 return ERR_PTR(-ENETUNREACH);
1294 }
1295 if (dst->dev == dev) { /* is this necessary? */
1296 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1297 dst_release(dst);
1298 return ERR_PTR(-ELOOP);
1299 }
1300 #ifdef CONFIG_DST_CACHE
1301 if (use_cache)
1302 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1303 #endif
1304 *saddr = fl6.saddr;
1305 return dst;
1306 }
1307 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1308
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1309 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1310 gfp_t gfp)
1311 {
1312 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1313 }
1314
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1315 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1316 gfp_t gfp)
1317 {
1318 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1319 }
1320
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1321 static void ip6_append_data_mtu(unsigned int *mtu,
1322 int *maxfraglen,
1323 unsigned int fragheaderlen,
1324 struct sk_buff *skb,
1325 struct rt6_info *rt,
1326 unsigned int orig_mtu)
1327 {
1328 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1329 if (!skb) {
1330 /* first fragment, reserve header_len */
1331 *mtu = orig_mtu - rt->dst.header_len;
1332
1333 } else {
1334 /*
1335 * this fragment is not first, the headers
1336 * space is regarded as data space.
1337 */
1338 *mtu = orig_mtu;
1339 }
1340 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1341 + fragheaderlen - sizeof(struct frag_hdr);
1342 }
1343 }
1344
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1345 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1346 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1347 struct rt6_info *rt, struct flowi6 *fl6)
1348 {
1349 struct ipv6_pinfo *np = inet6_sk(sk);
1350 unsigned int mtu;
1351 struct ipv6_txoptions *opt = ipc6->opt;
1352
1353 /*
1354 * setup for corking
1355 */
1356 if (opt) {
1357 if (WARN_ON(v6_cork->opt))
1358 return -EINVAL;
1359
1360 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1361 if (unlikely(!v6_cork->opt))
1362 return -ENOBUFS;
1363
1364 v6_cork->opt->tot_len = sizeof(*opt);
1365 v6_cork->opt->opt_flen = opt->opt_flen;
1366 v6_cork->opt->opt_nflen = opt->opt_nflen;
1367
1368 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1369 sk->sk_allocation);
1370 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1371 return -ENOBUFS;
1372
1373 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1374 sk->sk_allocation);
1375 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1376 return -ENOBUFS;
1377
1378 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1379 sk->sk_allocation);
1380 if (opt->hopopt && !v6_cork->opt->hopopt)
1381 return -ENOBUFS;
1382
1383 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1384 sk->sk_allocation);
1385 if (opt->srcrt && !v6_cork->opt->srcrt)
1386 return -ENOBUFS;
1387
1388 /* need source address above miyazawa*/
1389 }
1390 dst_hold(&rt->dst);
1391 cork->base.dst = &rt->dst;
1392 cork->fl.u.ip6 = *fl6;
1393 v6_cork->hop_limit = ipc6->hlimit;
1394 v6_cork->tclass = ipc6->tclass;
1395 if (rt->dst.flags & DST_XFRM_TUNNEL)
1396 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1397 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1398 else
1399 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1400 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1401 if (np->frag_size < mtu) {
1402 if (np->frag_size)
1403 mtu = np->frag_size;
1404 }
1405 if (mtu < IPV6_MIN_MTU)
1406 return -EINVAL;
1407 cork->base.fragsize = mtu;
1408 cork->base.gso_size = ipc6->gso_size;
1409 cork->base.tx_flags = 0;
1410 cork->base.mark = ipc6->sockc.mark;
1411 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1412
1413 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1414 cork->base.flags |= IPCORK_ALLFRAG;
1415 cork->base.length = 0;
1416
1417 cork->base.transmit_time = ipc6->sockc.transmit_time;
1418
1419 return 0;
1420 }
1421
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1422 static int __ip6_append_data(struct sock *sk,
1423 struct flowi6 *fl6,
1424 struct sk_buff_head *queue,
1425 struct inet_cork *cork,
1426 struct inet6_cork *v6_cork,
1427 struct page_frag *pfrag,
1428 int getfrag(void *from, char *to, int offset,
1429 int len, int odd, struct sk_buff *skb),
1430 void *from, int length, int transhdrlen,
1431 unsigned int flags, struct ipcm6_cookie *ipc6)
1432 {
1433 struct sk_buff *skb, *skb_prev = NULL;
1434 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1435 struct ubuf_info *uarg = NULL;
1436 int exthdrlen = 0;
1437 int dst_exthdrlen = 0;
1438 int hh_len;
1439 int copy;
1440 int err;
1441 int offset = 0;
1442 u32 tskey = 0;
1443 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1444 struct ipv6_txoptions *opt = v6_cork->opt;
1445 int csummode = CHECKSUM_NONE;
1446 unsigned int maxnonfragsize, headersize;
1447 unsigned int wmem_alloc_delta = 0;
1448 bool paged, extra_uref = false;
1449
1450 skb = skb_peek_tail(queue);
1451 if (!skb) {
1452 exthdrlen = opt ? opt->opt_flen : 0;
1453 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1454 }
1455
1456 paged = !!cork->gso_size;
1457 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1458 orig_mtu = mtu;
1459
1460 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1461 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1462 tskey = sk->sk_tskey++;
1463
1464 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1465
1466 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1467 (opt ? opt->opt_nflen : 0);
1468 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1469 sizeof(struct frag_hdr);
1470
1471 headersize = sizeof(struct ipv6hdr) +
1472 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1473 (dst_allfrag(&rt->dst) ?
1474 sizeof(struct frag_hdr) : 0) +
1475 rt->rt6i_nfheader_len;
1476
1477 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1478 * the first fragment
1479 */
1480 if (headersize + transhdrlen > mtu)
1481 goto emsgsize;
1482
1483 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1484 (sk->sk_protocol == IPPROTO_UDP ||
1485 sk->sk_protocol == IPPROTO_RAW)) {
1486 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1487 sizeof(struct ipv6hdr));
1488 goto emsgsize;
1489 }
1490
1491 if (ip6_sk_ignore_df(sk))
1492 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1493 else
1494 maxnonfragsize = mtu;
1495
1496 if (cork->length + length > maxnonfragsize - headersize) {
1497 emsgsize:
1498 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1499 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1500 return -EMSGSIZE;
1501 }
1502
1503 /* CHECKSUM_PARTIAL only with no extension headers and when
1504 * we are not going to fragment
1505 */
1506 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1507 headersize == sizeof(struct ipv6hdr) &&
1508 length <= mtu - headersize &&
1509 (!(flags & MSG_MORE) || cork->gso_size) &&
1510 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1511 csummode = CHECKSUM_PARTIAL;
1512
1513 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1514 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1515 if (!uarg)
1516 return -ENOBUFS;
1517 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1518 if (rt->dst.dev->features & NETIF_F_SG &&
1519 csummode == CHECKSUM_PARTIAL) {
1520 paged = true;
1521 } else {
1522 uarg->zerocopy = 0;
1523 skb_zcopy_set(skb, uarg, &extra_uref);
1524 }
1525 }
1526
1527 /*
1528 * Let's try using as much space as possible.
1529 * Use MTU if total length of the message fits into the MTU.
1530 * Otherwise, we need to reserve fragment header and
1531 * fragment alignment (= 8-15 octects, in total).
1532 *
1533 * Note that we may need to "move" the data from the tail
1534 * of the buffer to the new fragment when we split
1535 * the message.
1536 *
1537 * FIXME: It may be fragmented into multiple chunks
1538 * at once if non-fragmentable extension headers
1539 * are too large.
1540 * --yoshfuji
1541 */
1542
1543 cork->length += length;
1544 if (!skb)
1545 goto alloc_new_skb;
1546
1547 while (length > 0) {
1548 /* Check if the remaining data fits into current packet. */
1549 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1550 if (copy < length)
1551 copy = maxfraglen - skb->len;
1552
1553 if (copy <= 0) {
1554 char *data;
1555 unsigned int datalen;
1556 unsigned int fraglen;
1557 unsigned int fraggap;
1558 unsigned int alloclen;
1559 unsigned int pagedlen;
1560 alloc_new_skb:
1561 /* There's no room in the current skb */
1562 if (skb)
1563 fraggap = skb->len - maxfraglen;
1564 else
1565 fraggap = 0;
1566 /* update mtu and maxfraglen if necessary */
1567 if (!skb || !skb_prev)
1568 ip6_append_data_mtu(&mtu, &maxfraglen,
1569 fragheaderlen, skb, rt,
1570 orig_mtu);
1571
1572 skb_prev = skb;
1573
1574 /*
1575 * If remaining data exceeds the mtu,
1576 * we know we need more fragment(s).
1577 */
1578 datalen = length + fraggap;
1579
1580 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1581 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1582 fraglen = datalen + fragheaderlen;
1583 pagedlen = 0;
1584
1585 if ((flags & MSG_MORE) &&
1586 !(rt->dst.dev->features&NETIF_F_SG))
1587 alloclen = mtu;
1588 else if (!paged)
1589 alloclen = fraglen;
1590 else {
1591 alloclen = min_t(int, fraglen, MAX_HEADER);
1592 pagedlen = fraglen - alloclen;
1593 }
1594
1595 alloclen += dst_exthdrlen;
1596
1597 if (datalen != length + fraggap) {
1598 /*
1599 * this is not the last fragment, the trailer
1600 * space is regarded as data space.
1601 */
1602 datalen += rt->dst.trailer_len;
1603 }
1604
1605 alloclen += rt->dst.trailer_len;
1606 fraglen = datalen + fragheaderlen;
1607
1608 /*
1609 * We just reserve space for fragment header.
1610 * Note: this may be overallocation if the message
1611 * (without MSG_MORE) fits into the MTU.
1612 */
1613 alloclen += sizeof(struct frag_hdr);
1614
1615 copy = datalen - transhdrlen - fraggap - pagedlen;
1616 if (copy < 0) {
1617 err = -EINVAL;
1618 goto error;
1619 }
1620 if (transhdrlen) {
1621 skb = sock_alloc_send_skb(sk,
1622 alloclen + hh_len,
1623 (flags & MSG_DONTWAIT), &err);
1624 } else {
1625 skb = NULL;
1626 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1627 2 * sk->sk_sndbuf)
1628 skb = alloc_skb(alloclen + hh_len,
1629 sk->sk_allocation);
1630 if (unlikely(!skb))
1631 err = -ENOBUFS;
1632 }
1633 if (!skb)
1634 goto error;
1635 /*
1636 * Fill in the control structures
1637 */
1638 skb->protocol = htons(ETH_P_IPV6);
1639 skb->ip_summed = csummode;
1640 skb->csum = 0;
1641 /* reserve for fragmentation and ipsec header */
1642 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1643 dst_exthdrlen);
1644
1645 /*
1646 * Find where to start putting bytes
1647 */
1648 data = skb_put(skb, fraglen - pagedlen);
1649 skb_set_network_header(skb, exthdrlen);
1650 data += fragheaderlen;
1651 skb->transport_header = (skb->network_header +
1652 fragheaderlen);
1653 if (fraggap) {
1654 skb->csum = skb_copy_and_csum_bits(
1655 skb_prev, maxfraglen,
1656 data + transhdrlen, fraggap);
1657 skb_prev->csum = csum_sub(skb_prev->csum,
1658 skb->csum);
1659 data += fraggap;
1660 pskb_trim_unique(skb_prev, maxfraglen);
1661 }
1662 if (copy > 0 &&
1663 getfrag(from, data + transhdrlen, offset,
1664 copy, fraggap, skb) < 0) {
1665 err = -EFAULT;
1666 kfree_skb(skb);
1667 goto error;
1668 }
1669
1670 offset += copy;
1671 length -= copy + transhdrlen;
1672 transhdrlen = 0;
1673 exthdrlen = 0;
1674 dst_exthdrlen = 0;
1675
1676 /* Only the initial fragment is time stamped */
1677 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1678 cork->tx_flags = 0;
1679 skb_shinfo(skb)->tskey = tskey;
1680 tskey = 0;
1681 skb_zcopy_set(skb, uarg, &extra_uref);
1682
1683 if ((flags & MSG_CONFIRM) && !skb_prev)
1684 skb_set_dst_pending_confirm(skb, 1);
1685
1686 /*
1687 * Put the packet on the pending queue
1688 */
1689 if (!skb->destructor) {
1690 skb->destructor = sock_wfree;
1691 skb->sk = sk;
1692 wmem_alloc_delta += skb->truesize;
1693 }
1694 __skb_queue_tail(queue, skb);
1695 continue;
1696 }
1697
1698 if (copy > length)
1699 copy = length;
1700
1701 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1702 skb_tailroom(skb) >= copy) {
1703 unsigned int off;
1704
1705 off = skb->len;
1706 if (getfrag(from, skb_put(skb, copy),
1707 offset, copy, off, skb) < 0) {
1708 __skb_trim(skb, off);
1709 err = -EFAULT;
1710 goto error;
1711 }
1712 } else if (!uarg || !uarg->zerocopy) {
1713 int i = skb_shinfo(skb)->nr_frags;
1714
1715 err = -ENOMEM;
1716 if (!sk_page_frag_refill(sk, pfrag))
1717 goto error;
1718
1719 if (!skb_can_coalesce(skb, i, pfrag->page,
1720 pfrag->offset)) {
1721 err = -EMSGSIZE;
1722 if (i == MAX_SKB_FRAGS)
1723 goto error;
1724
1725 __skb_fill_page_desc(skb, i, pfrag->page,
1726 pfrag->offset, 0);
1727 skb_shinfo(skb)->nr_frags = ++i;
1728 get_page(pfrag->page);
1729 }
1730 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1731 if (getfrag(from,
1732 page_address(pfrag->page) + pfrag->offset,
1733 offset, copy, skb->len, skb) < 0)
1734 goto error_efault;
1735
1736 pfrag->offset += copy;
1737 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1738 skb->len += copy;
1739 skb->data_len += copy;
1740 skb->truesize += copy;
1741 wmem_alloc_delta += copy;
1742 } else {
1743 err = skb_zerocopy_iter_dgram(skb, from, copy);
1744 if (err < 0)
1745 goto error;
1746 }
1747 offset += copy;
1748 length -= copy;
1749 }
1750
1751 if (wmem_alloc_delta)
1752 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1753 return 0;
1754
1755 error_efault:
1756 err = -EFAULT;
1757 error:
1758 net_zcopy_put_abort(uarg, extra_uref);
1759 cork->length -= length;
1760 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1761 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1762 return err;
1763 }
1764
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1765 int ip6_append_data(struct sock *sk,
1766 int getfrag(void *from, char *to, int offset, int len,
1767 int odd, struct sk_buff *skb),
1768 void *from, int length, int transhdrlen,
1769 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1770 struct rt6_info *rt, unsigned int flags)
1771 {
1772 struct inet_sock *inet = inet_sk(sk);
1773 struct ipv6_pinfo *np = inet6_sk(sk);
1774 int exthdrlen;
1775 int err;
1776
1777 if (flags&MSG_PROBE)
1778 return 0;
1779 if (skb_queue_empty(&sk->sk_write_queue)) {
1780 /*
1781 * setup for corking
1782 */
1783 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1784 ipc6, rt, fl6);
1785 if (err)
1786 return err;
1787
1788 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1789 length += exthdrlen;
1790 transhdrlen += exthdrlen;
1791 } else {
1792 fl6 = &inet->cork.fl.u.ip6;
1793 transhdrlen = 0;
1794 }
1795
1796 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1797 &np->cork, sk_page_frag(sk), getfrag,
1798 from, length, transhdrlen, flags, ipc6);
1799 }
1800 EXPORT_SYMBOL_GPL(ip6_append_data);
1801
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1802 static void ip6_cork_release(struct inet_cork_full *cork,
1803 struct inet6_cork *v6_cork)
1804 {
1805 if (v6_cork->opt) {
1806 kfree(v6_cork->opt->dst0opt);
1807 kfree(v6_cork->opt->dst1opt);
1808 kfree(v6_cork->opt->hopopt);
1809 kfree(v6_cork->opt->srcrt);
1810 kfree(v6_cork->opt);
1811 v6_cork->opt = NULL;
1812 }
1813
1814 if (cork->base.dst) {
1815 dst_release(cork->base.dst);
1816 cork->base.dst = NULL;
1817 cork->base.flags &= ~IPCORK_ALLFRAG;
1818 }
1819 memset(&cork->fl, 0, sizeof(cork->fl));
1820 }
1821
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1822 struct sk_buff *__ip6_make_skb(struct sock *sk,
1823 struct sk_buff_head *queue,
1824 struct inet_cork_full *cork,
1825 struct inet6_cork *v6_cork)
1826 {
1827 struct sk_buff *skb, *tmp_skb;
1828 struct sk_buff **tail_skb;
1829 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1830 struct ipv6_pinfo *np = inet6_sk(sk);
1831 struct net *net = sock_net(sk);
1832 struct ipv6hdr *hdr;
1833 struct ipv6_txoptions *opt = v6_cork->opt;
1834 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1835 struct flowi6 *fl6 = &cork->fl.u.ip6;
1836 unsigned char proto = fl6->flowi6_proto;
1837
1838 skb = __skb_dequeue(queue);
1839 if (!skb)
1840 goto out;
1841 tail_skb = &(skb_shinfo(skb)->frag_list);
1842
1843 /* move skb->data to ip header from ext header */
1844 if (skb->data < skb_network_header(skb))
1845 __skb_pull(skb, skb_network_offset(skb));
1846 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1847 __skb_pull(tmp_skb, skb_network_header_len(skb));
1848 *tail_skb = tmp_skb;
1849 tail_skb = &(tmp_skb->next);
1850 skb->len += tmp_skb->len;
1851 skb->data_len += tmp_skb->len;
1852 skb->truesize += tmp_skb->truesize;
1853 tmp_skb->destructor = NULL;
1854 tmp_skb->sk = NULL;
1855 }
1856
1857 /* Allow local fragmentation. */
1858 skb->ignore_df = ip6_sk_ignore_df(sk);
1859
1860 *final_dst = fl6->daddr;
1861 __skb_pull(skb, skb_network_header_len(skb));
1862 if (opt && opt->opt_flen)
1863 ipv6_push_frag_opts(skb, opt, &proto);
1864 if (opt && opt->opt_nflen)
1865 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1866
1867 skb_push(skb, sizeof(struct ipv6hdr));
1868 skb_reset_network_header(skb);
1869 hdr = ipv6_hdr(skb);
1870
1871 ip6_flow_hdr(hdr, v6_cork->tclass,
1872 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1873 ip6_autoflowlabel(net, np), fl6));
1874 hdr->hop_limit = v6_cork->hop_limit;
1875 hdr->nexthdr = proto;
1876 hdr->saddr = fl6->saddr;
1877 hdr->daddr = *final_dst;
1878
1879 skb->priority = sk->sk_priority;
1880 skb->mark = cork->base.mark;
1881
1882 skb->tstamp = cork->base.transmit_time;
1883
1884 skb_dst_set(skb, dst_clone(&rt->dst));
1885 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1886 if (proto == IPPROTO_ICMPV6) {
1887 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1888
1889 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1890 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1891 }
1892
1893 ip6_cork_release(cork, v6_cork);
1894 out:
1895 return skb;
1896 }
1897
ip6_send_skb(struct sk_buff * skb)1898 int ip6_send_skb(struct sk_buff *skb)
1899 {
1900 struct net *net = sock_net(skb->sk);
1901 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1902 int err;
1903
1904 err = ip6_local_out(net, skb->sk, skb);
1905 if (err) {
1906 if (err > 0)
1907 err = net_xmit_errno(err);
1908 if (err)
1909 IP6_INC_STATS(net, rt->rt6i_idev,
1910 IPSTATS_MIB_OUTDISCARDS);
1911 }
1912
1913 return err;
1914 }
1915
ip6_push_pending_frames(struct sock * sk)1916 int ip6_push_pending_frames(struct sock *sk)
1917 {
1918 struct sk_buff *skb;
1919
1920 skb = ip6_finish_skb(sk);
1921 if (!skb)
1922 return 0;
1923
1924 return ip6_send_skb(skb);
1925 }
1926 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1927
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1928 static void __ip6_flush_pending_frames(struct sock *sk,
1929 struct sk_buff_head *queue,
1930 struct inet_cork_full *cork,
1931 struct inet6_cork *v6_cork)
1932 {
1933 struct sk_buff *skb;
1934
1935 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1936 if (skb_dst(skb))
1937 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1938 IPSTATS_MIB_OUTDISCARDS);
1939 kfree_skb(skb);
1940 }
1941
1942 ip6_cork_release(cork, v6_cork);
1943 }
1944
ip6_flush_pending_frames(struct sock * sk)1945 void ip6_flush_pending_frames(struct sock *sk)
1946 {
1947 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1948 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1949 }
1950 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1951
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1952 struct sk_buff *ip6_make_skb(struct sock *sk,
1953 int getfrag(void *from, char *to, int offset,
1954 int len, int odd, struct sk_buff *skb),
1955 void *from, int length, int transhdrlen,
1956 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1957 struct rt6_info *rt, unsigned int flags,
1958 struct inet_cork_full *cork)
1959 {
1960 struct inet6_cork v6_cork;
1961 struct sk_buff_head queue;
1962 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1963 int err;
1964
1965 if (flags & MSG_PROBE)
1966 return NULL;
1967
1968 __skb_queue_head_init(&queue);
1969
1970 cork->base.flags = 0;
1971 cork->base.addr = 0;
1972 cork->base.opt = NULL;
1973 cork->base.dst = NULL;
1974 v6_cork.opt = NULL;
1975 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1976 if (err) {
1977 ip6_cork_release(cork, &v6_cork);
1978 return ERR_PTR(err);
1979 }
1980 if (ipc6->dontfrag < 0)
1981 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1982
1983 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1984 ¤t->task_frag, getfrag, from,
1985 length + exthdrlen, transhdrlen + exthdrlen,
1986 flags, ipc6);
1987 if (err) {
1988 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1989 return ERR_PTR(err);
1990 }
1991
1992 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1993 }
1994