ip6_output: fragment outgoing reassembled skb properly
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int __ip6_local_out(struct sk_buff *skb)
60 {
61         int len;
62
63         len = skb->len - sizeof(struct ipv6hdr);
64         if (len > IPV6_MAXPLEN)
65                 len = 0;
66         ipv6_hdr(skb)->payload_len = htons(len);
67
68         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69                        skb_dst(skb)->dev, dst_output);
70 }
71
72 int ip6_local_out(struct sk_buff *skb)
73 {
74         int err;
75
76         err = __ip6_local_out(skb);
77         if (likely(err == 1))
78                 err = dst_output(skb);
79
80         return err;
81 }
82 EXPORT_SYMBOL_GPL(ip6_local_out);
83
84 static int ip6_finish_output2(struct sk_buff *skb)
85 {
86         struct dst_entry *dst = skb_dst(skb);
87         struct net_device *dev = dst->dev;
88         struct neighbour *neigh;
89         struct in6_addr *nexthop;
90         int ret;
91
92         skb->protocol = htons(ETH_P_IPV6);
93         skb->dev = dev;
94
95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
99                     ((mroute6_socket(dev_net(dev), skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(dev_net(dev), idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120
121                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122                                 skb->len);
123
124                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125                     IPV6_ADDR_SCOPE_NODELOCAL &&
126                     !(dev->flags & IFF_LOOPBACK)) {
127                         kfree_skb(skb);
128                         return 0;
129                 }
130         }
131
132         rcu_read_lock_bh();
133         nexthop = rt6_nexthop((struct rt6_info *)dst);
134         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135         if (unlikely(!neigh))
136                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137         if (!IS_ERR(neigh)) {
138                 ret = dst_neigh_output(dst, neigh, skb);
139                 rcu_read_unlock_bh();
140                 return ret;
141         }
142         rcu_read_unlock_bh();
143
144         IP6_INC_STATS_BH(dev_net(dst->dev),
145                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146         kfree_skb(skb);
147         return -EINVAL;
148 }
149
150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153             dst_allfrag(skb_dst(skb)) ||
154             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
155                 return ip6_fragment(skb, ip6_finish_output2);
156         else
157                 return ip6_finish_output2(skb);
158 }
159
160 int ip6_output(struct sk_buff *skb)
161 {
162         struct net_device *dev = skb_dst(skb)->dev;
163         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
164         if (unlikely(idev->cnf.disable_ipv6)) {
165                 IP6_INC_STATS(dev_net(dev), idev,
166                               IPSTATS_MIB_OUTDISCARDS);
167                 kfree_skb(skb);
168                 return 0;
169         }
170
171         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
172                             ip6_finish_output,
173                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174 }
175
176 /*
177  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
178  */
179
180 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
181              struct ipv6_txoptions *opt, int tclass)
182 {
183         struct net *net = sock_net(sk);
184         struct ipv6_pinfo *np = inet6_sk(sk);
185         struct in6_addr *first_hop = &fl6->daddr;
186         struct dst_entry *dst = skb_dst(skb);
187         struct ipv6hdr *hdr;
188         u8  proto = fl6->flowi6_proto;
189         int seg_len = skb->len;
190         int hlimit = -1;
191         u32 mtu;
192
193         if (opt) {
194                 unsigned int head_room;
195
196                 /* First: exthdrs may take lots of space (~8K for now)
197                    MAX_HEADER is not enough.
198                  */
199                 head_room = opt->opt_nflen + opt->opt_flen;
200                 seg_len += head_room;
201                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
202
203                 if (skb_headroom(skb) < head_room) {
204                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
205                         if (skb2 == NULL) {
206                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
207                                               IPSTATS_MIB_OUTDISCARDS);
208                                 kfree_skb(skb);
209                                 return -ENOBUFS;
210                         }
211                         consume_skb(skb);
212                         skb = skb2;
213                         skb_set_owner_w(skb, sk);
214                 }
215                 if (opt->opt_flen)
216                         ipv6_push_frag_opts(skb, opt, &proto);
217                 if (opt->opt_nflen)
218                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
219         }
220
221         skb_push(skb, sizeof(struct ipv6hdr));
222         skb_reset_network_header(skb);
223         hdr = ipv6_hdr(skb);
224
225         /*
226          *      Fill in the IPv6 header
227          */
228         if (np)
229                 hlimit = np->hop_limit;
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232
233         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
234
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238
239         hdr->saddr = fl6->saddr;
240         hdr->daddr = *first_hop;
241
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         skb->dev = dst->dev;
254         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
255         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
256         kfree_skb(skb);
257         return -EMSGSIZE;
258 }
259
260 EXPORT_SYMBOL(ip6_xmit);
261
262 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
263 {
264         struct ip6_ra_chain *ra;
265         struct sock *last = NULL;
266
267         read_lock(&ip6_ra_lock);
268         for (ra = ip6_ra_chain; ra; ra = ra->next) {
269                 struct sock *sk = ra->sk;
270                 if (sk && ra->sel == sel &&
271                     (!sk->sk_bound_dev_if ||
272                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
273                         if (last) {
274                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
275                                 if (skb2)
276                                         rawv6_rcv(last, skb2);
277                         }
278                         last = sk;
279                 }
280         }
281
282         if (last) {
283                 rawv6_rcv(last, skb);
284                 read_unlock(&ip6_ra_lock);
285                 return 1;
286         }
287         read_unlock(&ip6_ra_lock);
288         return 0;
289 }
290
291 static int ip6_forward_proxy_check(struct sk_buff *skb)
292 {
293         struct ipv6hdr *hdr = ipv6_hdr(skb);
294         u8 nexthdr = hdr->nexthdr;
295         __be16 frag_off;
296         int offset;
297
298         if (ipv6_ext_hdr(nexthdr)) {
299                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
300                 if (offset < 0)
301                         return 0;
302         } else
303                 offset = sizeof(struct ipv6hdr);
304
305         if (nexthdr == IPPROTO_ICMPV6) {
306                 struct icmp6hdr *icmp6;
307
308                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
309                                          offset + 1 - skb->data)))
310                         return 0;
311
312                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
313
314                 switch (icmp6->icmp6_type) {
315                 case NDISC_ROUTER_SOLICITATION:
316                 case NDISC_ROUTER_ADVERTISEMENT:
317                 case NDISC_NEIGHBOUR_SOLICITATION:
318                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
319                 case NDISC_REDIRECT:
320                         /* For reaction involving unicast neighbor discovery
321                          * message destined to the proxied address, pass it to
322                          * input function.
323                          */
324                         return 1;
325                 default:
326                         break;
327                 }
328         }
329
330         /*
331          * The proxying router can't forward traffic sent to a link-local
332          * address, so signal the sender and discard the packet. This
333          * behavior is clarified by the MIPv6 specification.
334          */
335         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
336                 dst_link_failure(skb);
337                 return -1;
338         }
339
340         return 0;
341 }
342
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347
348 int ip6_forward(struct sk_buff *skb)
349 {
350         struct dst_entry *dst = skb_dst(skb);
351         struct ipv6hdr *hdr = ipv6_hdr(skb);
352         struct inet6_skb_parm *opt = IP6CB(skb);
353         struct net *net = dev_net(dst->dev);
354         u32 mtu;
355
356         if (net->ipv6.devconf_all->forwarding == 0)
357                 goto error;
358
359         if (skb_warn_if_lro(skb))
360                 goto drop;
361
362         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
363                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
364                 goto drop;
365         }
366
367         if (skb->pkt_type != PACKET_HOST)
368                 goto drop;
369
370         skb_forward_csum(skb);
371
372         /*
373          *      We DO NOT make any processing on
374          *      RA packets, pushing them to user level AS IS
375          *      without ane WARRANTY that application will be able
376          *      to interpret them. The reason is that we
377          *      cannot make anything clever here.
378          *
379          *      We are not end-node, so that if packet contains
380          *      AH/ESP, we cannot make anything.
381          *      Defragmentation also would be mistake, RA packets
382          *      cannot be fragmented, because there is no warranty
383          *      that different fragments will go along one path. --ANK
384          */
385         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
386                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
387                         return 0;
388         }
389
390         /*
391          *      check and decrement ttl
392          */
393         if (hdr->hop_limit <= 1) {
394                 /* Force OUTPUT device used as source address */
395                 skb->dev = dst->dev;
396                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
397                 IP6_INC_STATS_BH(net,
398                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
399
400                 kfree_skb(skb);
401                 return -ETIMEDOUT;
402         }
403
404         /* XXX: idev->cnf.proxy_ndp? */
405         if (net->ipv6.devconf_all->proxy_ndp &&
406             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
407                 int proxied = ip6_forward_proxy_check(skb);
408                 if (proxied > 0)
409                         return ip6_input(skb);
410                 else if (proxied < 0) {
411                         IP6_INC_STATS(net, ip6_dst_idev(dst),
412                                       IPSTATS_MIB_INDISCARDS);
413                         goto drop;
414                 }
415         }
416
417         if (!xfrm6_route_forward(skb)) {
418                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
419                 goto drop;
420         }
421         dst = skb_dst(skb);
422
423         /* IPv6 specs say nothing about it, but it is clear that we cannot
424            send redirects to source routed frames.
425            We don't send redirects to frames decapsulated from IPsec.
426          */
427         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
428                 struct in6_addr *target = NULL;
429                 struct inet_peer *peer;
430                 struct rt6_info *rt;
431
432                 /*
433                  *      incoming and outgoing devices are the same
434                  *      send a redirect.
435                  */
436
437                 rt = (struct rt6_info *) dst;
438                 if (rt->rt6i_flags & RTF_GATEWAY)
439                         target = &rt->rt6i_gateway;
440                 else
441                         target = &hdr->daddr;
442
443                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
444
445                 /* Limit redirects both by destination (here)
446                    and by source (inside ndisc_send_redirect)
447                  */
448                 if (inet_peer_xrlim_allow(peer, 1*HZ))
449                         ndisc_send_redirect(skb, target);
450                 if (peer)
451                         inet_putpeer(peer);
452         } else {
453                 int addrtype = ipv6_addr_type(&hdr->saddr);
454
455                 /* This check is security critical. */
456                 if (addrtype == IPV6_ADDR_ANY ||
457                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
458                         goto error;
459                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
460                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
461                                     ICMPV6_NOT_NEIGHBOUR, 0);
462                         goto error;
463                 }
464         }
465
466         mtu = dst_mtu(dst);
467         if (mtu < IPV6_MIN_MTU)
468                 mtu = IPV6_MIN_MTU;
469
470         if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
471             (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
472                 /* Again, force OUTPUT device used as source address */
473                 skb->dev = dst->dev;
474                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
475                 IP6_INC_STATS_BH(net,
476                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
477                 IP6_INC_STATS_BH(net,
478                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
479                 kfree_skb(skb);
480                 return -EMSGSIZE;
481         }
482
483         if (skb_cow(skb, dst->dev->hard_header_len)) {
484                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
485                 goto drop;
486         }
487
488         hdr = ipv6_hdr(skb);
489
490         /* Mangling hops number delayed to point after skb COW */
491
492         hdr->hop_limit--;
493
494         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
495         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
496         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
497                        ip6_forward_finish);
498
499 error:
500         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
501 drop:
502         kfree_skb(skb);
503         return -EINVAL;
504 }
505
506 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
507 {
508         to->pkt_type = from->pkt_type;
509         to->priority = from->priority;
510         to->protocol = from->protocol;
511         skb_dst_drop(to);
512         skb_dst_set(to, dst_clone(skb_dst(from)));
513         to->dev = from->dev;
514         to->mark = from->mark;
515
516 #ifdef CONFIG_NET_SCHED
517         to->tc_index = from->tc_index;
518 #endif
519         nf_copy(to, from);
520 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
521         to->nf_trace = from->nf_trace;
522 #endif
523         skb_copy_secmark(to, from);
524 }
525
526 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
527 {
528         struct sk_buff *frag;
529         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
530         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
531         struct ipv6hdr *tmp_hdr;
532         struct frag_hdr *fh;
533         unsigned int mtu, hlen, left, len;
534         int hroom, troom;
535         __be32 frag_id = 0;
536         int ptr, offset = 0, err=0;
537         u8 *prevhdr, nexthdr = 0;
538         struct net *net = dev_net(skb_dst(skb)->dev);
539
540         hlen = ip6_find_1stfragopt(skb, &prevhdr);
541         nexthdr = *prevhdr;
542
543         mtu = ip6_skb_dst_mtu(skb);
544
545         /* We must not fragment if the socket is set to force MTU discovery
546          * or if the skb it not generated by a local socket.
547          */
548         if (unlikely(!skb->local_df && skb->len > mtu) ||
549                      (IP6CB(skb)->frag_max_size &&
550                       IP6CB(skb)->frag_max_size > mtu)) {
551                 if (skb->sk && dst_allfrag(skb_dst(skb)))
552                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
553
554                 skb->dev = skb_dst(skb)->dev;
555                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
556                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
557                               IPSTATS_MIB_FRAGFAILS);
558                 kfree_skb(skb);
559                 return -EMSGSIZE;
560         }
561
562         if (np && np->frag_size < mtu) {
563                 if (np->frag_size)
564                         mtu = np->frag_size;
565         }
566         mtu -= hlen + sizeof(struct frag_hdr);
567
568         if (skb_has_frag_list(skb)) {
569                 int first_len = skb_pagelen(skb);
570                 struct sk_buff *frag2;
571
572                 if (first_len - hlen > mtu ||
573                     ((first_len - hlen) & 7) ||
574                     skb_cloned(skb))
575                         goto slow_path;
576
577                 skb_walk_frags(skb, frag) {
578                         /* Correct geometry. */
579                         if (frag->len > mtu ||
580                             ((frag->len & 7) && frag->next) ||
581                             skb_headroom(frag) < hlen)
582                                 goto slow_path_clean;
583
584                         /* Partially cloned skb? */
585                         if (skb_shared(frag))
586                                 goto slow_path_clean;
587
588                         BUG_ON(frag->sk);
589                         if (skb->sk) {
590                                 frag->sk = skb->sk;
591                                 frag->destructor = sock_wfree;
592                         }
593                         skb->truesize -= frag->truesize;
594                 }
595
596                 err = 0;
597                 offset = 0;
598                 frag = skb_shinfo(skb)->frag_list;
599                 skb_frag_list_init(skb);
600                 /* BUILD HEADER */
601
602                 *prevhdr = NEXTHDR_FRAGMENT;
603                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
604                 if (!tmp_hdr) {
605                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
606                                       IPSTATS_MIB_FRAGFAILS);
607                         return -ENOMEM;
608                 }
609
610                 __skb_pull(skb, hlen);
611                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
612                 __skb_push(skb, hlen);
613                 skb_reset_network_header(skb);
614                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
615
616                 ipv6_select_ident(fh, rt);
617                 fh->nexthdr = nexthdr;
618                 fh->reserved = 0;
619                 fh->frag_off = htons(IP6_MF);
620                 frag_id = fh->identification;
621
622                 first_len = skb_pagelen(skb);
623                 skb->data_len = first_len - skb_headlen(skb);
624                 skb->len = first_len;
625                 ipv6_hdr(skb)->payload_len = htons(first_len -
626                                                    sizeof(struct ipv6hdr));
627
628                 dst_hold(&rt->dst);
629
630                 for (;;) {
631                         /* Prepare header of the next frame,
632                          * before previous one went down. */
633                         if (frag) {
634                                 frag->ip_summed = CHECKSUM_NONE;
635                                 skb_reset_transport_header(frag);
636                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
637                                 __skb_push(frag, hlen);
638                                 skb_reset_network_header(frag);
639                                 memcpy(skb_network_header(frag), tmp_hdr,
640                                        hlen);
641                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
642                                 fh->nexthdr = nexthdr;
643                                 fh->reserved = 0;
644                                 fh->frag_off = htons(offset);
645                                 if (frag->next != NULL)
646                                         fh->frag_off |= htons(IP6_MF);
647                                 fh->identification = frag_id;
648                                 ipv6_hdr(frag)->payload_len =
649                                                 htons(frag->len -
650                                                       sizeof(struct ipv6hdr));
651                                 ip6_copy_metadata(frag, skb);
652                         }
653
654                         err = output(skb);
655                         if(!err)
656                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
657                                               IPSTATS_MIB_FRAGCREATES);
658
659                         if (err || !frag)
660                                 break;
661
662                         skb = frag;
663                         frag = skb->next;
664                         skb->next = NULL;
665                 }
666
667                 kfree(tmp_hdr);
668
669                 if (err == 0) {
670                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
671                                       IPSTATS_MIB_FRAGOKS);
672                         ip6_rt_put(rt);
673                         return 0;
674                 }
675
676                 while (frag) {
677                         skb = frag->next;
678                         kfree_skb(frag);
679                         frag = skb;
680                 }
681
682                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
683                               IPSTATS_MIB_FRAGFAILS);
684                 ip6_rt_put(rt);
685                 return err;
686
687 slow_path_clean:
688                 skb_walk_frags(skb, frag2) {
689                         if (frag2 == frag)
690                                 break;
691                         frag2->sk = NULL;
692                         frag2->destructor = NULL;
693                         skb->truesize += frag2->truesize;
694                 }
695         }
696
697 slow_path:
698         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
699             skb_checksum_help(skb))
700                 goto fail;
701
702         left = skb->len - hlen;         /* Space per frame */
703         ptr = hlen;                     /* Where to start from */
704
705         /*
706          *      Fragment the datagram.
707          */
708
709         *prevhdr = NEXTHDR_FRAGMENT;
710         hroom = LL_RESERVED_SPACE(rt->dst.dev);
711         troom = rt->dst.dev->needed_tailroom;
712
713         /*
714          *      Keep copying data until we run out.
715          */
716         while(left > 0) {
717                 len = left;
718                 /* IF: it doesn't fit, use 'mtu' - the data space left */
719                 if (len > mtu)
720                         len = mtu;
721                 /* IF: we are not sending up to and including the packet end
722                    then align the next start on an eight byte boundary */
723                 if (len < left) {
724                         len &= ~7;
725                 }
726                 /*
727                  *      Allocate buffer.
728                  */
729
730                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
731                                       hroom + troom, GFP_ATOMIC)) == NULL) {
732                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
733                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
734                                       IPSTATS_MIB_FRAGFAILS);
735                         err = -ENOMEM;
736                         goto fail;
737                 }
738
739                 /*
740                  *      Set up data on packet
741                  */
742
743                 ip6_copy_metadata(frag, skb);
744                 skb_reserve(frag, hroom);
745                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
746                 skb_reset_network_header(frag);
747                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
748                 frag->transport_header = (frag->network_header + hlen +
749                                           sizeof(struct frag_hdr));
750
751                 /*
752                  *      Charge the memory for the fragment to any owner
753                  *      it might possess
754                  */
755                 if (skb->sk)
756                         skb_set_owner_w(frag, skb->sk);
757
758                 /*
759                  *      Copy the packet header into the new buffer.
760                  */
761                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
762
763                 /*
764                  *      Build fragment header.
765                  */
766                 fh->nexthdr = nexthdr;
767                 fh->reserved = 0;
768                 if (!frag_id) {
769                         ipv6_select_ident(fh, rt);
770                         frag_id = fh->identification;
771                 } else
772                         fh->identification = frag_id;
773
774                 /*
775                  *      Copy a block of the IP datagram.
776                  */
777                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
778                         BUG();
779                 left -= len;
780
781                 fh->frag_off = htons(offset);
782                 if (left > 0)
783                         fh->frag_off |= htons(IP6_MF);
784                 ipv6_hdr(frag)->payload_len = htons(frag->len -
785                                                     sizeof(struct ipv6hdr));
786
787                 ptr += len;
788                 offset += len;
789
790                 /*
791                  *      Put this fragment into the sending queue.
792                  */
793                 err = output(frag);
794                 if (err)
795                         goto fail;
796
797                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798                               IPSTATS_MIB_FRAGCREATES);
799         }
800         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
801                       IPSTATS_MIB_FRAGOKS);
802         consume_skb(skb);
803         return err;
804
805 fail:
806         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
807                       IPSTATS_MIB_FRAGFAILS);
808         kfree_skb(skb);
809         return err;
810 }
811
812 static inline int ip6_rt_check(const struct rt6key *rt_key,
813                                const struct in6_addr *fl_addr,
814                                const struct in6_addr *addr_cache)
815 {
816         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
817                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
818 }
819
820 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
821                                           struct dst_entry *dst,
822                                           const struct flowi6 *fl6)
823 {
824         struct ipv6_pinfo *np = inet6_sk(sk);
825         struct rt6_info *rt;
826
827         if (!dst)
828                 goto out;
829
830         if (dst->ops->family != AF_INET6) {
831                 dst_release(dst);
832                 return NULL;
833         }
834
835         rt = (struct rt6_info *)dst;
836         /* Yes, checking route validity in not connected
837          * case is not very simple. Take into account,
838          * that we do not support routing by source, TOS,
839          * and MSG_DONTROUTE            --ANK (980726)
840          *
841          * 1. ip6_rt_check(): If route was host route,
842          *    check that cached destination is current.
843          *    If it is network route, we still may
844          *    check its validity using saved pointer
845          *    to the last used address: daddr_cache.
846          *    We do not want to save whole address now,
847          *    (because main consumer of this service
848          *    is tcp, which has not this problem),
849          *    so that the last trick works only on connected
850          *    sockets.
851          * 2. oif also should be the same.
852          */
853         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
854 #ifdef CONFIG_IPV6_SUBTREES
855             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
856 #endif
857             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
858                 dst_release(dst);
859                 dst = NULL;
860         }
861
862 out:
863         return dst;
864 }
865
866 static int ip6_dst_lookup_tail(struct sock *sk,
867                                struct dst_entry **dst, struct flowi6 *fl6)
868 {
869         struct net *net = sock_net(sk);
870 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
871         struct neighbour *n;
872         struct rt6_info *rt;
873 #endif
874         int err;
875
876         if (*dst == NULL)
877                 *dst = ip6_route_output(net, sk, fl6);
878
879         if ((err = (*dst)->error))
880                 goto out_err_release;
881
882         if (ipv6_addr_any(&fl6->saddr)) {
883                 struct rt6_info *rt = (struct rt6_info *) *dst;
884                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
885                                           sk ? inet6_sk(sk)->srcprefs : 0,
886                                           &fl6->saddr);
887                 if (err)
888                         goto out_err_release;
889         }
890
891 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
892         /*
893          * Here if the dst entry we've looked up
894          * has a neighbour entry that is in the INCOMPLETE
895          * state and the src address from the flow is
896          * marked as OPTIMISTIC, we release the found
897          * dst entry and replace it instead with the
898          * dst entry of the nexthop router
899          */
900         rt = (struct rt6_info *) *dst;
901         rcu_read_lock_bh();
902         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
903         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
904         rcu_read_unlock_bh();
905
906         if (err) {
907                 struct inet6_ifaddr *ifp;
908                 struct flowi6 fl_gw6;
909                 int redirect;
910
911                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
912                                       (*dst)->dev, 1);
913
914                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                 if (ifp)
916                         in6_ifa_put(ifp);
917
918                 if (redirect) {
919                         /*
920                          * We need to get the dst entry for the
921                          * default router instead
922                          */
923                         dst_release(*dst);
924                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
925                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
926                         *dst = ip6_route_output(net, sk, &fl_gw6);
927                         if ((err = (*dst)->error))
928                                 goto out_err_release;
929                 }
930         }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         if (err == -ENETUNREACH)
937                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
938         dst_release(*dst);
939         *dst = NULL;
940         return err;
941 }
942
943 /**
944  *      ip6_dst_lookup - perform route lookup on flow
945  *      @sk: socket which provides route info
946  *      @dst: pointer to dst_entry * for result
947  *      @fl6: flow to lookup
948  *
949  *      This function performs a route lookup on the given flow.
950  *
951  *      It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
954 {
955         *dst = NULL;
956         return ip6_dst_lookup_tail(sk, dst, fl6);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
960 /**
961  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
962  *      @sk: socket which provides route info
963  *      @fl6: flow to lookup
964  *      @final_dst: final destination address for ipsec lookup
965  *      @can_sleep: we are in a sleepable context
966  *
967  *      This function performs a route lookup on the given flow.
968  *
969  *      It returns a valid dst pointer on success, or a pointer encoded
970  *      error code.
971  */
972 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
973                                       const struct in6_addr *final_dst,
974                                       bool can_sleep)
975 {
976         struct dst_entry *dst = NULL;
977         int err;
978
979         err = ip6_dst_lookup_tail(sk, &dst, fl6);
980         if (err)
981                 return ERR_PTR(err);
982         if (final_dst)
983                 fl6->daddr = *final_dst;
984         if (can_sleep)
985                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
986
987         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
988 }
989 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
990
991 /**
992  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
993  *      @sk: socket which provides the dst cache and route info
994  *      @fl6: flow to lookup
995  *      @final_dst: final destination address for ipsec lookup
996  *      @can_sleep: we are in a sleepable context
997  *
998  *      This function performs a route lookup on the given flow with the
999  *      possibility of using the cached route in the socket if it is valid.
1000  *      It will take the socket dst lock when operating on the dst cache.
1001  *      As a result, this function can only be used in process context.
1002  *
1003  *      It returns a valid dst pointer on success, or a pointer encoded
1004  *      error code.
1005  */
1006 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1007                                          const struct in6_addr *final_dst,
1008                                          bool can_sleep)
1009 {
1010         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1011         int err;
1012
1013         dst = ip6_sk_dst_check(sk, dst, fl6);
1014
1015         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1016         if (err)
1017                 return ERR_PTR(err);
1018         if (final_dst)
1019                 fl6->daddr = *final_dst;
1020         if (can_sleep)
1021                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1022
1023         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1024 }
1025 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1026
1027 static inline int ip6_ufo_append_data(struct sock *sk,
1028                         int getfrag(void *from, char *to, int offset, int len,
1029                         int odd, struct sk_buff *skb),
1030                         void *from, int length, int hh_len, int fragheaderlen,
1031                         int transhdrlen, int mtu,unsigned int flags,
1032                         struct rt6_info *rt)
1033
1034 {
1035         struct sk_buff *skb;
1036         int err;
1037
1038         /* There is support for UDP large send offload by network
1039          * device, so create one single skb packet containing complete
1040          * udp datagram
1041          */
1042         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1043                 struct frag_hdr fhdr;
1044
1045                 skb = sock_alloc_send_skb(sk,
1046                         hh_len + fragheaderlen + transhdrlen + 20,
1047                         (flags & MSG_DONTWAIT), &err);
1048                 if (skb == NULL)
1049                         return err;
1050
1051                 /* reserve space for Hardware header */
1052                 skb_reserve(skb, hh_len);
1053
1054                 /* create space for UDP/IP header */
1055                 skb_put(skb,fragheaderlen + transhdrlen);
1056
1057                 /* initialize network header pointer */
1058                 skb_reset_network_header(skb);
1059
1060                 /* initialize protocol header pointer */
1061                 skb->transport_header = skb->network_header + fragheaderlen;
1062
1063                 skb->ip_summed = CHECKSUM_PARTIAL;
1064                 skb->csum = 0;
1065
1066                 /* Specify the length of each IPv6 datagram fragment.
1067                  * It has to be a multiple of 8.
1068                  */
1069                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1070                                              sizeof(struct frag_hdr)) & ~7;
1071                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1072                 ipv6_select_ident(&fhdr, rt);
1073                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1074                 __skb_queue_tail(&sk->sk_write_queue, skb);
1075         }
1076
1077         return skb_append_datato_frags(sk, skb, getfrag, from,
1078                                        (length - transhdrlen));
1079 }
1080
1081 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1082                                                gfp_t gfp)
1083 {
1084         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1085 }
1086
1087 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1088                                                 gfp_t gfp)
1089 {
1090         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1091 }
1092
1093 static void ip6_append_data_mtu(unsigned int *mtu,
1094                                 int *maxfraglen,
1095                                 unsigned int fragheaderlen,
1096                                 struct sk_buff *skb,
1097                                 struct rt6_info *rt,
1098                                 bool pmtuprobe)
1099 {
1100         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1101                 if (skb == NULL) {
1102                         /* first fragment, reserve header_len */
1103                         *mtu = *mtu - rt->dst.header_len;
1104
1105                 } else {
1106                         /*
1107                          * this fragment is not first, the headers
1108                          * space is regarded as data space.
1109                          */
1110                         *mtu = min(*mtu, pmtuprobe ?
1111                                    rt->dst.dev->mtu :
1112                                    dst_mtu(rt->dst.path));
1113                 }
1114                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1115                               + fragheaderlen - sizeof(struct frag_hdr);
1116         }
1117 }
1118
1119 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1120         int offset, int len, int odd, struct sk_buff *skb),
1121         void *from, int length, int transhdrlen,
1122         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1123         struct rt6_info *rt, unsigned int flags, int dontfrag)
1124 {
1125         struct inet_sock *inet = inet_sk(sk);
1126         struct ipv6_pinfo *np = inet6_sk(sk);
1127         struct inet_cork *cork;
1128         struct sk_buff *skb, *skb_prev = NULL;
1129         unsigned int maxfraglen, fragheaderlen, mtu;
1130         int exthdrlen;
1131         int dst_exthdrlen;
1132         int hh_len;
1133         int copy;
1134         int err;
1135         int offset = 0;
1136         __u8 tx_flags = 0;
1137
1138         if (flags&MSG_PROBE)
1139                 return 0;
1140         cork = &inet->cork.base;
1141         if (skb_queue_empty(&sk->sk_write_queue)) {
1142                 /*
1143                  * setup for corking
1144                  */
1145                 if (opt) {
1146                         if (WARN_ON(np->cork.opt))
1147                                 return -EINVAL;
1148
1149                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1150                         if (unlikely(np->cork.opt == NULL))
1151                                 return -ENOBUFS;
1152
1153                         np->cork.opt->tot_len = opt->tot_len;
1154                         np->cork.opt->opt_flen = opt->opt_flen;
1155                         np->cork.opt->opt_nflen = opt->opt_nflen;
1156
1157                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1158                                                             sk->sk_allocation);
1159                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1160                                 return -ENOBUFS;
1161
1162                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1163                                                             sk->sk_allocation);
1164                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1165                                 return -ENOBUFS;
1166
1167                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1168                                                            sk->sk_allocation);
1169                         if (opt->hopopt && !np->cork.opt->hopopt)
1170                                 return -ENOBUFS;
1171
1172                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1173                                                             sk->sk_allocation);
1174                         if (opt->srcrt && !np->cork.opt->srcrt)
1175                                 return -ENOBUFS;
1176
1177                         /* need source address above miyazawa*/
1178                 }
1179                 dst_hold(&rt->dst);
1180                 cork->dst = &rt->dst;
1181                 inet->cork.fl.u.ip6 = *fl6;
1182                 np->cork.hop_limit = hlimit;
1183                 np->cork.tclass = tclass;
1184                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1185                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1186                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1187                 else
1188                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1189                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1190                 if (np->frag_size < mtu) {
1191                         if (np->frag_size)
1192                                 mtu = np->frag_size;
1193                 }
1194                 cork->fragsize = mtu;
1195                 if (dst_allfrag(rt->dst.path))
1196                         cork->flags |= IPCORK_ALLFRAG;
1197                 cork->length = 0;
1198                 exthdrlen = (opt ? opt->opt_flen : 0);
1199                 length += exthdrlen;
1200                 transhdrlen += exthdrlen;
1201                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1202         } else {
1203                 rt = (struct rt6_info *)cork->dst;
1204                 fl6 = &inet->cork.fl.u.ip6;
1205                 opt = np->cork.opt;
1206                 transhdrlen = 0;
1207                 exthdrlen = 0;
1208                 dst_exthdrlen = 0;
1209                 mtu = cork->fragsize;
1210         }
1211
1212         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1213
1214         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1215                         (opt ? opt->opt_nflen : 0);
1216         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1217
1218         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1219                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1220                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1221                         return -EMSGSIZE;
1222                 }
1223         }
1224
1225         /* For UDP, check if TX timestamp is enabled */
1226         if (sk->sk_type == SOCK_DGRAM)
1227                 sock_tx_timestamp(sk, &tx_flags);
1228
1229         /*
1230          * Let's try using as much space as possible.
1231          * Use MTU if total length of the message fits into the MTU.
1232          * Otherwise, we need to reserve fragment header and
1233          * fragment alignment (= 8-15 octects, in total).
1234          *
1235          * Note that we may need to "move" the data from the tail of
1236          * of the buffer to the new fragment when we split
1237          * the message.
1238          *
1239          * FIXME: It may be fragmented into multiple chunks
1240          *        at once if non-fragmentable extension headers
1241          *        are too large.
1242          * --yoshfuji
1243          */
1244
1245         if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1246                                            sk->sk_protocol == IPPROTO_RAW)) {
1247                 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1248                 return -EMSGSIZE;
1249         }
1250
1251         skb = skb_peek_tail(&sk->sk_write_queue);
1252         cork->length += length;
1253         if (((length > mtu) ||
1254              (skb && skb_has_frags(skb))) &&
1255             (sk->sk_protocol == IPPROTO_UDP) &&
1256             (rt->dst.dev->features & NETIF_F_UFO)) {
1257                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1258                                           hh_len, fragheaderlen,
1259                                           transhdrlen, mtu, flags, rt);
1260                 if (err)
1261                         goto error;
1262                 return 0;
1263         }
1264
1265         if (!skb)
1266                 goto alloc_new_skb;
1267
1268         while (length > 0) {
1269                 /* Check if the remaining data fits into current packet. */
1270                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1271                 if (copy < length)
1272                         copy = maxfraglen - skb->len;
1273
1274                 if (copy <= 0) {
1275                         char *data;
1276                         unsigned int datalen;
1277                         unsigned int fraglen;
1278                         unsigned int fraggap;
1279                         unsigned int alloclen;
1280 alloc_new_skb:
1281                         /* There's no room in the current skb */
1282                         if (skb)
1283                                 fraggap = skb->len - maxfraglen;
1284                         else
1285                                 fraggap = 0;
1286                         /* update mtu and maxfraglen if necessary */
1287                         if (skb == NULL || skb_prev == NULL)
1288                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1289                                                     fragheaderlen, skb, rt,
1290                                                     np->pmtudisc ==
1291                                                     IPV6_PMTUDISC_PROBE);
1292
1293                         skb_prev = skb;
1294
1295                         /*
1296                          * If remaining data exceeds the mtu,
1297                          * we know we need more fragment(s).
1298                          */
1299                         datalen = length + fraggap;
1300
1301                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1302                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1303                         if ((flags & MSG_MORE) &&
1304                             !(rt->dst.dev->features&NETIF_F_SG))
1305                                 alloclen = mtu;
1306                         else
1307                                 alloclen = datalen + fragheaderlen;
1308
1309                         alloclen += dst_exthdrlen;
1310
1311                         if (datalen != length + fraggap) {
1312                                 /*
1313                                  * this is not the last fragment, the trailer
1314                                  * space is regarded as data space.
1315                                  */
1316                                 datalen += rt->dst.trailer_len;
1317                         }
1318
1319                         alloclen += rt->dst.trailer_len;
1320                         fraglen = datalen + fragheaderlen;
1321
1322                         /*
1323                          * We just reserve space for fragment header.
1324                          * Note: this may be overallocation if the message
1325                          * (without MSG_MORE) fits into the MTU.
1326                          */
1327                         alloclen += sizeof(struct frag_hdr);
1328
1329                         if (transhdrlen) {
1330                                 skb = sock_alloc_send_skb(sk,
1331                                                 alloclen + hh_len,
1332                                                 (flags & MSG_DONTWAIT), &err);
1333                         } else {
1334                                 skb = NULL;
1335                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1336                                     2 * sk->sk_sndbuf)
1337                                         skb = sock_wmalloc(sk,
1338                                                            alloclen + hh_len, 1,
1339                                                            sk->sk_allocation);
1340                                 if (unlikely(skb == NULL))
1341                                         err = -ENOBUFS;
1342                                 else {
1343                                         /* Only the initial fragment
1344                                          * is time stamped.
1345                                          */
1346                                         tx_flags = 0;
1347                                 }
1348                         }
1349                         if (skb == NULL)
1350                                 goto error;
1351                         /*
1352                          *      Fill in the control structures
1353                          */
1354                         skb->ip_summed = CHECKSUM_NONE;
1355                         skb->csum = 0;
1356                         /* reserve for fragmentation and ipsec header */
1357                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1358                                     dst_exthdrlen);
1359
1360                         if (sk->sk_type == SOCK_DGRAM)
1361                                 skb_shinfo(skb)->tx_flags = tx_flags;
1362
1363                         /*
1364                          *      Find where to start putting bytes
1365                          */
1366                         data = skb_put(skb, fraglen);
1367                         skb_set_network_header(skb, exthdrlen);
1368                         data += fragheaderlen;
1369                         skb->transport_header = (skb->network_header +
1370                                                  fragheaderlen);
1371                         if (fraggap) {
1372                                 skb->csum = skb_copy_and_csum_bits(
1373                                         skb_prev, maxfraglen,
1374                                         data + transhdrlen, fraggap, 0);
1375                                 skb_prev->csum = csum_sub(skb_prev->csum,
1376                                                           skb->csum);
1377                                 data += fraggap;
1378                                 pskb_trim_unique(skb_prev, maxfraglen);
1379                         }
1380                         copy = datalen - transhdrlen - fraggap;
1381
1382                         if (copy < 0) {
1383                                 err = -EINVAL;
1384                                 kfree_skb(skb);
1385                                 goto error;
1386                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1387                                 err = -EFAULT;
1388                                 kfree_skb(skb);
1389                                 goto error;
1390                         }
1391
1392                         offset += copy;
1393                         length -= datalen - fraggap;
1394                         transhdrlen = 0;
1395                         exthdrlen = 0;
1396                         dst_exthdrlen = 0;
1397
1398                         /*
1399                          * Put the packet on the pending queue
1400                          */
1401                         __skb_queue_tail(&sk->sk_write_queue, skb);
1402                         continue;
1403                 }
1404
1405                 if (copy > length)
1406                         copy = length;
1407
1408                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1409                         unsigned int off;
1410
1411                         off = skb->len;
1412                         if (getfrag(from, skb_put(skb, copy),
1413                                                 offset, copy, off, skb) < 0) {
1414                                 __skb_trim(skb, off);
1415                                 err = -EFAULT;
1416                                 goto error;
1417                         }
1418                 } else {
1419                         int i = skb_shinfo(skb)->nr_frags;
1420                         struct page_frag *pfrag = sk_page_frag(sk);
1421
1422                         err = -ENOMEM;
1423                         if (!sk_page_frag_refill(sk, pfrag))
1424                                 goto error;
1425
1426                         if (!skb_can_coalesce(skb, i, pfrag->page,
1427                                               pfrag->offset)) {
1428                                 err = -EMSGSIZE;
1429                                 if (i == MAX_SKB_FRAGS)
1430                                         goto error;
1431
1432                                 __skb_fill_page_desc(skb, i, pfrag->page,
1433                                                      pfrag->offset, 0);
1434                                 skb_shinfo(skb)->nr_frags = ++i;
1435                                 get_page(pfrag->page);
1436                         }
1437                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1438                         if (getfrag(from,
1439                                     page_address(pfrag->page) + pfrag->offset,
1440                                     offset, copy, skb->len, skb) < 0)
1441                                 goto error_efault;
1442
1443                         pfrag->offset += copy;
1444                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1445                         skb->len += copy;
1446                         skb->data_len += copy;
1447                         skb->truesize += copy;
1448                         atomic_add(copy, &sk->sk_wmem_alloc);
1449                 }
1450                 offset += copy;
1451                 length -= copy;
1452         }
1453
1454         return 0;
1455
1456 error_efault:
1457         err = -EFAULT;
1458 error:
1459         cork->length -= length;
1460         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1461         return err;
1462 }
1463 EXPORT_SYMBOL_GPL(ip6_append_data);
1464
1465 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1466 {
1467         if (np->cork.opt) {
1468                 kfree(np->cork.opt->dst0opt);
1469                 kfree(np->cork.opt->dst1opt);
1470                 kfree(np->cork.opt->hopopt);
1471                 kfree(np->cork.opt->srcrt);
1472                 kfree(np->cork.opt);
1473                 np->cork.opt = NULL;
1474         }
1475
1476         if (inet->cork.base.dst) {
1477                 dst_release(inet->cork.base.dst);
1478                 inet->cork.base.dst = NULL;
1479                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1480         }
1481         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1482 }
1483
1484 int ip6_push_pending_frames(struct sock *sk)
1485 {
1486         struct sk_buff *skb, *tmp_skb;
1487         struct sk_buff **tail_skb;
1488         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1489         struct inet_sock *inet = inet_sk(sk);
1490         struct ipv6_pinfo *np = inet6_sk(sk);
1491         struct net *net = sock_net(sk);
1492         struct ipv6hdr *hdr;
1493         struct ipv6_txoptions *opt = np->cork.opt;
1494         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1495         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1496         unsigned char proto = fl6->flowi6_proto;
1497         int err = 0;
1498
1499         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1500                 goto out;
1501         tail_skb = &(skb_shinfo(skb)->frag_list);
1502
1503         /* move skb->data to ip header from ext header */
1504         if (skb->data < skb_network_header(skb))
1505                 __skb_pull(skb, skb_network_offset(skb));
1506         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1507                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1508                 *tail_skb = tmp_skb;
1509                 tail_skb = &(tmp_skb->next);
1510                 skb->len += tmp_skb->len;
1511                 skb->data_len += tmp_skb->len;
1512                 skb->truesize += tmp_skb->truesize;
1513                 tmp_skb->destructor = NULL;
1514                 tmp_skb->sk = NULL;
1515         }
1516
1517         /* Allow local fragmentation. */
1518         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1519                 skb->local_df = 1;
1520
1521         *final_dst = fl6->daddr;
1522         __skb_pull(skb, skb_network_header_len(skb));
1523         if (opt && opt->opt_flen)
1524                 ipv6_push_frag_opts(skb, opt, &proto);
1525         if (opt && opt->opt_nflen)
1526                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1527
1528         skb_push(skb, sizeof(struct ipv6hdr));
1529         skb_reset_network_header(skb);
1530         hdr = ipv6_hdr(skb);
1531
1532         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1533         hdr->hop_limit = np->cork.hop_limit;
1534         hdr->nexthdr = proto;
1535         hdr->saddr = fl6->saddr;
1536         hdr->daddr = *final_dst;
1537
1538         skb->priority = sk->sk_priority;
1539         skb->mark = sk->sk_mark;
1540
1541         skb_dst_set(skb, dst_clone(&rt->dst));
1542         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1543         if (proto == IPPROTO_ICMPV6) {
1544                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1545
1546                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1547                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1548         }
1549
1550         err = ip6_local_out(skb);
1551         if (err) {
1552                 if (err > 0)
1553                         err = net_xmit_errno(err);
1554                 if (err)
1555                         goto error;
1556         }
1557
1558 out:
1559         ip6_cork_release(inet, np);
1560         return err;
1561 error:
1562         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1563         goto out;
1564 }
1565 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1566
1567 void ip6_flush_pending_frames(struct sock *sk)
1568 {
1569         struct sk_buff *skb;
1570
1571         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1572                 if (skb_dst(skb))
1573                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1574                                       IPSTATS_MIB_OUTDISCARDS);
1575                 kfree_skb(skb);
1576         }
1577
1578         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1579 }
1580 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);