Merge branch 'x86-kaslr-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int __ip6_local_out(struct sk_buff *skb)
60 {
61         int len;
62
63         len = skb->len - sizeof(struct ipv6hdr);
64         if (len > IPV6_MAXPLEN)
65                 len = 0;
66         ipv6_hdr(skb)->payload_len = htons(len);
67
68         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69                        skb_dst(skb)->dev, dst_output);
70 }
71
72 int ip6_local_out(struct sk_buff *skb)
73 {
74         int err;
75
76         err = __ip6_local_out(skb);
77         if (likely(err == 1))
78                 err = dst_output(skb);
79
80         return err;
81 }
82 EXPORT_SYMBOL_GPL(ip6_local_out);
83
84 static int ip6_finish_output2(struct sk_buff *skb)
85 {
86         struct dst_entry *dst = skb_dst(skb);
87         struct net_device *dev = dst->dev;
88         struct neighbour *neigh;
89         struct in6_addr *nexthop;
90         int ret;
91
92         skb->protocol = htons(ETH_P_IPV6);
93         skb->dev = dev;
94
95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
99                     ((mroute6_socket(dev_net(dev), skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(dev_net(dev), idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120
121                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122                                 skb->len);
123
124                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125                     IPV6_ADDR_SCOPE_NODELOCAL &&
126                     !(dev->flags & IFF_LOOPBACK)) {
127                         kfree_skb(skb);
128                         return 0;
129                 }
130         }
131
132         rcu_read_lock_bh();
133         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
134         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135         if (unlikely(!neigh))
136                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137         if (!IS_ERR(neigh)) {
138                 ret = dst_neigh_output(dst, neigh, skb);
139                 rcu_read_unlock_bh();
140                 return ret;
141         }
142         rcu_read_unlock_bh();
143
144         IP6_INC_STATS_BH(dev_net(dst->dev),
145                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146         kfree_skb(skb);
147         return -EINVAL;
148 }
149
150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153             dst_allfrag(skb_dst(skb)))
154                 return ip6_fragment(skb, ip6_finish_output2);
155         else
156                 return ip6_finish_output2(skb);
157 }
158
159 int ip6_output(struct sk_buff *skb)
160 {
161         struct net_device *dev = skb_dst(skb)->dev;
162         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
163         if (unlikely(idev->cnf.disable_ipv6)) {
164                 IP6_INC_STATS(dev_net(dev), idev,
165                               IPSTATS_MIB_OUTDISCARDS);
166                 kfree_skb(skb);
167                 return 0;
168         }
169
170         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
171                             ip6_finish_output,
172                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
173 }
174
175 /*
176  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
177  */
178
179 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
180              struct ipv6_txoptions *opt, int tclass)
181 {
182         struct net *net = sock_net(sk);
183         struct ipv6_pinfo *np = inet6_sk(sk);
184         struct in6_addr *first_hop = &fl6->daddr;
185         struct dst_entry *dst = skb_dst(skb);
186         struct ipv6hdr *hdr;
187         u8  proto = fl6->flowi6_proto;
188         int seg_len = skb->len;
189         int hlimit = -1;
190         u32 mtu;
191
192         if (opt) {
193                 unsigned int head_room;
194
195                 /* First: exthdrs may take lots of space (~8K for now)
196                    MAX_HEADER is not enough.
197                  */
198                 head_room = opt->opt_nflen + opt->opt_flen;
199                 seg_len += head_room;
200                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
201
202                 if (skb_headroom(skb) < head_room) {
203                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
204                         if (skb2 == NULL) {
205                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
206                                               IPSTATS_MIB_OUTDISCARDS);
207                                 kfree_skb(skb);
208                                 return -ENOBUFS;
209                         }
210                         consume_skb(skb);
211                         skb = skb2;
212                         skb_set_owner_w(skb, sk);
213                 }
214                 if (opt->opt_flen)
215                         ipv6_push_frag_opts(skb, opt, &proto);
216                 if (opt->opt_nflen)
217                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
218         }
219
220         skb_push(skb, sizeof(struct ipv6hdr));
221         skb_reset_network_header(skb);
222         hdr = ipv6_hdr(skb);
223
224         /*
225          *      Fill in the IPv6 header
226          */
227         if (np)
228                 hlimit = np->hop_limit;
229         if (hlimit < 0)
230                 hlimit = ip6_dst_hoplimit(dst);
231
232         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
233
234         hdr->payload_len = htons(seg_len);
235         hdr->nexthdr = proto;
236         hdr->hop_limit = hlimit;
237
238         hdr->saddr = fl6->saddr;
239         hdr->daddr = *first_hop;
240
241         skb->protocol = htons(ETH_P_IPV6);
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         skb->dev = dst->dev;
254         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
255         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
256         kfree_skb(skb);
257         return -EMSGSIZE;
258 }
259
260 EXPORT_SYMBOL(ip6_xmit);
261
262 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
263 {
264         struct ip6_ra_chain *ra;
265         struct sock *last = NULL;
266
267         read_lock(&ip6_ra_lock);
268         for (ra = ip6_ra_chain; ra; ra = ra->next) {
269                 struct sock *sk = ra->sk;
270                 if (sk && ra->sel == sel &&
271                     (!sk->sk_bound_dev_if ||
272                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
273                         if (last) {
274                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
275                                 if (skb2)
276                                         rawv6_rcv(last, skb2);
277                         }
278                         last = sk;
279                 }
280         }
281
282         if (last) {
283                 rawv6_rcv(last, skb);
284                 read_unlock(&ip6_ra_lock);
285                 return 1;
286         }
287         read_unlock(&ip6_ra_lock);
288         return 0;
289 }
290
291 static int ip6_forward_proxy_check(struct sk_buff *skb)
292 {
293         struct ipv6hdr *hdr = ipv6_hdr(skb);
294         u8 nexthdr = hdr->nexthdr;
295         __be16 frag_off;
296         int offset;
297
298         if (ipv6_ext_hdr(nexthdr)) {
299                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
300                 if (offset < 0)
301                         return 0;
302         } else
303                 offset = sizeof(struct ipv6hdr);
304
305         if (nexthdr == IPPROTO_ICMPV6) {
306                 struct icmp6hdr *icmp6;
307
308                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
309                                          offset + 1 - skb->data)))
310                         return 0;
311
312                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
313
314                 switch (icmp6->icmp6_type) {
315                 case NDISC_ROUTER_SOLICITATION:
316                 case NDISC_ROUTER_ADVERTISEMENT:
317                 case NDISC_NEIGHBOUR_SOLICITATION:
318                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
319                 case NDISC_REDIRECT:
320                         /* For reaction involving unicast neighbor discovery
321                          * message destined to the proxied address, pass it to
322                          * input function.
323                          */
324                         return 1;
325                 default:
326                         break;
327                 }
328         }
329
330         /*
331          * The proxying router can't forward traffic sent to a link-local
332          * address, so signal the sender and discard the packet. This
333          * behavior is clarified by the MIPv6 specification.
334          */
335         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
336                 dst_link_failure(skb);
337                 return -1;
338         }
339
340         return 0;
341 }
342
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347
348 int ip6_forward(struct sk_buff *skb)
349 {
350         struct dst_entry *dst = skb_dst(skb);
351         struct ipv6hdr *hdr = ipv6_hdr(skb);
352         struct inet6_skb_parm *opt = IP6CB(skb);
353         struct net *net = dev_net(dst->dev);
354         u32 mtu;
355
356         if (net->ipv6.devconf_all->forwarding == 0)
357                 goto error;
358
359         if (skb_warn_if_lro(skb))
360                 goto drop;
361
362         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
363                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
364                 goto drop;
365         }
366
367         if (skb->pkt_type != PACKET_HOST)
368                 goto drop;
369
370         skb_forward_csum(skb);
371
372         /*
373          *      We DO NOT make any processing on
374          *      RA packets, pushing them to user level AS IS
375          *      without ane WARRANTY that application will be able
376          *      to interpret them. The reason is that we
377          *      cannot make anything clever here.
378          *
379          *      We are not end-node, so that if packet contains
380          *      AH/ESP, we cannot make anything.
381          *      Defragmentation also would be mistake, RA packets
382          *      cannot be fragmented, because there is no warranty
383          *      that different fragments will go along one path. --ANK
384          */
385         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
386                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
387                         return 0;
388         }
389
390         /*
391          *      check and decrement ttl
392          */
393         if (hdr->hop_limit <= 1) {
394                 /* Force OUTPUT device used as source address */
395                 skb->dev = dst->dev;
396                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
397                 IP6_INC_STATS_BH(net,
398                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
399
400                 kfree_skb(skb);
401                 return -ETIMEDOUT;
402         }
403
404         /* XXX: idev->cnf.proxy_ndp? */
405         if (net->ipv6.devconf_all->proxy_ndp &&
406             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
407                 int proxied = ip6_forward_proxy_check(skb);
408                 if (proxied > 0)
409                         return ip6_input(skb);
410                 else if (proxied < 0) {
411                         IP6_INC_STATS(net, ip6_dst_idev(dst),
412                                       IPSTATS_MIB_INDISCARDS);
413                         goto drop;
414                 }
415         }
416
417         if (!xfrm6_route_forward(skb)) {
418                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
419                 goto drop;
420         }
421         dst = skb_dst(skb);
422
423         /* IPv6 specs say nothing about it, but it is clear that we cannot
424            send redirects to source routed frames.
425            We don't send redirects to frames decapsulated from IPsec.
426          */
427         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
428                 struct in6_addr *target = NULL;
429                 struct inet_peer *peer;
430                 struct rt6_info *rt;
431
432                 /*
433                  *      incoming and outgoing devices are the same
434                  *      send a redirect.
435                  */
436
437                 rt = (struct rt6_info *) dst;
438                 if (rt->rt6i_flags & RTF_GATEWAY)
439                         target = &rt->rt6i_gateway;
440                 else
441                         target = &hdr->daddr;
442
443                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
444
445                 /* Limit redirects both by destination (here)
446                    and by source (inside ndisc_send_redirect)
447                  */
448                 if (inet_peer_xrlim_allow(peer, 1*HZ))
449                         ndisc_send_redirect(skb, target);
450                 if (peer)
451                         inet_putpeer(peer);
452         } else {
453                 int addrtype = ipv6_addr_type(&hdr->saddr);
454
455                 /* This check is security critical. */
456                 if (addrtype == IPV6_ADDR_ANY ||
457                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
458                         goto error;
459                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
460                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
461                                     ICMPV6_NOT_NEIGHBOUR, 0);
462                         goto error;
463                 }
464         }
465
466         mtu = dst_mtu(dst);
467         if (mtu < IPV6_MIN_MTU)
468                 mtu = IPV6_MIN_MTU;
469
470         if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
471             (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
472                 /* Again, force OUTPUT device used as source address */
473                 skb->dev = dst->dev;
474                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
475                 IP6_INC_STATS_BH(net,
476                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
477                 IP6_INC_STATS_BH(net,
478                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
479                 kfree_skb(skb);
480                 return -EMSGSIZE;
481         }
482
483         if (skb_cow(skb, dst->dev->hard_header_len)) {
484                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
485                 goto drop;
486         }
487
488         hdr = ipv6_hdr(skb);
489
490         /* Mangling hops number delayed to point after skb COW */
491
492         hdr->hop_limit--;
493
494         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
495         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
496         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
497                        ip6_forward_finish);
498
499 error:
500         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
501 drop:
502         kfree_skb(skb);
503         return -EINVAL;
504 }
505
506 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
507 {
508         to->pkt_type = from->pkt_type;
509         to->priority = from->priority;
510         to->protocol = from->protocol;
511         skb_dst_drop(to);
512         skb_dst_set(to, dst_clone(skb_dst(from)));
513         to->dev = from->dev;
514         to->mark = from->mark;
515
516 #ifdef CONFIG_NET_SCHED
517         to->tc_index = from->tc_index;
518 #endif
519         nf_copy(to, from);
520 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
521         to->nf_trace = from->nf_trace;
522 #endif
523         skb_copy_secmark(to, from);
524 }
525
526 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
527 {
528         struct sk_buff *frag;
529         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
530         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
531         struct ipv6hdr *tmp_hdr;
532         struct frag_hdr *fh;
533         unsigned int mtu, hlen, left, len;
534         int hroom, troom;
535         __be32 frag_id = 0;
536         int ptr, offset = 0, err=0;
537         u8 *prevhdr, nexthdr = 0;
538         struct net *net = dev_net(skb_dst(skb)->dev);
539
540         hlen = ip6_find_1stfragopt(skb, &prevhdr);
541         nexthdr = *prevhdr;
542
543         mtu = ip6_skb_dst_mtu(skb);
544
545         /* We must not fragment if the socket is set to force MTU discovery
546          * or if the skb it not generated by a local socket.
547          */
548         if (unlikely(!skb->local_df && skb->len > mtu) ||
549                      (IP6CB(skb)->frag_max_size &&
550                       IP6CB(skb)->frag_max_size > mtu)) {
551                 if (skb->sk && dst_allfrag(skb_dst(skb)))
552                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
553
554                 skb->dev = skb_dst(skb)->dev;
555                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
556                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
557                               IPSTATS_MIB_FRAGFAILS);
558                 kfree_skb(skb);
559                 return -EMSGSIZE;
560         }
561
562         if (np && np->frag_size < mtu) {
563                 if (np->frag_size)
564                         mtu = np->frag_size;
565         }
566         mtu -= hlen + sizeof(struct frag_hdr);
567
568         if (skb_has_frag_list(skb)) {
569                 int first_len = skb_pagelen(skb);
570                 struct sk_buff *frag2;
571
572                 if (first_len - hlen > mtu ||
573                     ((first_len - hlen) & 7) ||
574                     skb_cloned(skb))
575                         goto slow_path;
576
577                 skb_walk_frags(skb, frag) {
578                         /* Correct geometry. */
579                         if (frag->len > mtu ||
580                             ((frag->len & 7) && frag->next) ||
581                             skb_headroom(frag) < hlen)
582                                 goto slow_path_clean;
583
584                         /* Partially cloned skb? */
585                         if (skb_shared(frag))
586                                 goto slow_path_clean;
587
588                         BUG_ON(frag->sk);
589                         if (skb->sk) {
590                                 frag->sk = skb->sk;
591                                 frag->destructor = sock_wfree;
592                         }
593                         skb->truesize -= frag->truesize;
594                 }
595
596                 err = 0;
597                 offset = 0;
598                 frag = skb_shinfo(skb)->frag_list;
599                 skb_frag_list_init(skb);
600                 /* BUILD HEADER */
601
602                 *prevhdr = NEXTHDR_FRAGMENT;
603                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
604                 if (!tmp_hdr) {
605                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
606                                       IPSTATS_MIB_FRAGFAILS);
607                         return -ENOMEM;
608                 }
609
610                 __skb_pull(skb, hlen);
611                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
612                 __skb_push(skb, hlen);
613                 skb_reset_network_header(skb);
614                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
615
616                 ipv6_select_ident(fh, rt);
617                 fh->nexthdr = nexthdr;
618                 fh->reserved = 0;
619                 fh->frag_off = htons(IP6_MF);
620                 frag_id = fh->identification;
621
622                 first_len = skb_pagelen(skb);
623                 skb->data_len = first_len - skb_headlen(skb);
624                 skb->len = first_len;
625                 ipv6_hdr(skb)->payload_len = htons(first_len -
626                                                    sizeof(struct ipv6hdr));
627
628                 dst_hold(&rt->dst);
629
630                 for (;;) {
631                         /* Prepare header of the next frame,
632                          * before previous one went down. */
633                         if (frag) {
634                                 frag->ip_summed = CHECKSUM_NONE;
635                                 skb_reset_transport_header(frag);
636                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
637                                 __skb_push(frag, hlen);
638                                 skb_reset_network_header(frag);
639                                 memcpy(skb_network_header(frag), tmp_hdr,
640                                        hlen);
641                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
642                                 fh->nexthdr = nexthdr;
643                                 fh->reserved = 0;
644                                 fh->frag_off = htons(offset);
645                                 if (frag->next != NULL)
646                                         fh->frag_off |= htons(IP6_MF);
647                                 fh->identification = frag_id;
648                                 ipv6_hdr(frag)->payload_len =
649                                                 htons(frag->len -
650                                                       sizeof(struct ipv6hdr));
651                                 ip6_copy_metadata(frag, skb);
652                         }
653
654                         err = output(skb);
655                         if(!err)
656                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
657                                               IPSTATS_MIB_FRAGCREATES);
658
659                         if (err || !frag)
660                                 break;
661
662                         skb = frag;
663                         frag = skb->next;
664                         skb->next = NULL;
665                 }
666
667                 kfree(tmp_hdr);
668
669                 if (err == 0) {
670                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
671                                       IPSTATS_MIB_FRAGOKS);
672                         ip6_rt_put(rt);
673                         return 0;
674                 }
675
676                 while (frag) {
677                         skb = frag->next;
678                         kfree_skb(frag);
679                         frag = skb;
680                 }
681
682                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
683                               IPSTATS_MIB_FRAGFAILS);
684                 ip6_rt_put(rt);
685                 return err;
686
687 slow_path_clean:
688                 skb_walk_frags(skb, frag2) {
689                         if (frag2 == frag)
690                                 break;
691                         frag2->sk = NULL;
692                         frag2->destructor = NULL;
693                         skb->truesize += frag2->truesize;
694                 }
695         }
696
697 slow_path:
698         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
699             skb_checksum_help(skb))
700                 goto fail;
701
702         left = skb->len - hlen;         /* Space per frame */
703         ptr = hlen;                     /* Where to start from */
704
705         /*
706          *      Fragment the datagram.
707          */
708
709         *prevhdr = NEXTHDR_FRAGMENT;
710         hroom = LL_RESERVED_SPACE(rt->dst.dev);
711         troom = rt->dst.dev->needed_tailroom;
712
713         /*
714          *      Keep copying data until we run out.
715          */
716         while(left > 0) {
717                 len = left;
718                 /* IF: it doesn't fit, use 'mtu' - the data space left */
719                 if (len > mtu)
720                         len = mtu;
721                 /* IF: we are not sending up to and including the packet end
722                    then align the next start on an eight byte boundary */
723                 if (len < left) {
724                         len &= ~7;
725                 }
726                 /*
727                  *      Allocate buffer.
728                  */
729
730                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
731                                       hroom + troom, GFP_ATOMIC)) == NULL) {
732                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
733                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
734                                       IPSTATS_MIB_FRAGFAILS);
735                         err = -ENOMEM;
736                         goto fail;
737                 }
738
739                 /*
740                  *      Set up data on packet
741                  */
742
743                 ip6_copy_metadata(frag, skb);
744                 skb_reserve(frag, hroom);
745                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
746                 skb_reset_network_header(frag);
747                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
748                 frag->transport_header = (frag->network_header + hlen +
749                                           sizeof(struct frag_hdr));
750
751                 /*
752                  *      Charge the memory for the fragment to any owner
753                  *      it might possess
754                  */
755                 if (skb->sk)
756                         skb_set_owner_w(frag, skb->sk);
757
758                 /*
759                  *      Copy the packet header into the new buffer.
760                  */
761                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
762
763                 /*
764                  *      Build fragment header.
765                  */
766                 fh->nexthdr = nexthdr;
767                 fh->reserved = 0;
768                 if (!frag_id) {
769                         ipv6_select_ident(fh, rt);
770                         frag_id = fh->identification;
771                 } else
772                         fh->identification = frag_id;
773
774                 /*
775                  *      Copy a block of the IP datagram.
776                  */
777                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
778                         BUG();
779                 left -= len;
780
781                 fh->frag_off = htons(offset);
782                 if (left > 0)
783                         fh->frag_off |= htons(IP6_MF);
784                 ipv6_hdr(frag)->payload_len = htons(frag->len -
785                                                     sizeof(struct ipv6hdr));
786
787                 ptr += len;
788                 offset += len;
789
790                 /*
791                  *      Put this fragment into the sending queue.
792                  */
793                 err = output(frag);
794                 if (err)
795                         goto fail;
796
797                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798                               IPSTATS_MIB_FRAGCREATES);
799         }
800         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
801                       IPSTATS_MIB_FRAGOKS);
802         consume_skb(skb);
803         return err;
804
805 fail:
806         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
807                       IPSTATS_MIB_FRAGFAILS);
808         kfree_skb(skb);
809         return err;
810 }
811
812 static inline int ip6_rt_check(const struct rt6key *rt_key,
813                                const struct in6_addr *fl_addr,
814                                const struct in6_addr *addr_cache)
815 {
816         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
817                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
818 }
819
820 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
821                                           struct dst_entry *dst,
822                                           const struct flowi6 *fl6)
823 {
824         struct ipv6_pinfo *np = inet6_sk(sk);
825         struct rt6_info *rt;
826
827         if (!dst)
828                 goto out;
829
830         if (dst->ops->family != AF_INET6) {
831                 dst_release(dst);
832                 return NULL;
833         }
834
835         rt = (struct rt6_info *)dst;
836         /* Yes, checking route validity in not connected
837          * case is not very simple. Take into account,
838          * that we do not support routing by source, TOS,
839          * and MSG_DONTROUTE            --ANK (980726)
840          *
841          * 1. ip6_rt_check(): If route was host route,
842          *    check that cached destination is current.
843          *    If it is network route, we still may
844          *    check its validity using saved pointer
845          *    to the last used address: daddr_cache.
846          *    We do not want to save whole address now,
847          *    (because main consumer of this service
848          *    is tcp, which has not this problem),
849          *    so that the last trick works only on connected
850          *    sockets.
851          * 2. oif also should be the same.
852          */
853         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
854 #ifdef CONFIG_IPV6_SUBTREES
855             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
856 #endif
857             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
858                 dst_release(dst);
859                 dst = NULL;
860         }
861
862 out:
863         return dst;
864 }
865
866 static int ip6_dst_lookup_tail(struct sock *sk,
867                                struct dst_entry **dst, struct flowi6 *fl6)
868 {
869         struct net *net = sock_net(sk);
870 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
871         struct neighbour *n;
872         struct rt6_info *rt;
873 #endif
874         int err;
875
876         if (*dst == NULL)
877                 *dst = ip6_route_output(net, sk, fl6);
878
879         if ((err = (*dst)->error))
880                 goto out_err_release;
881
882         if (ipv6_addr_any(&fl6->saddr)) {
883                 struct rt6_info *rt = (struct rt6_info *) *dst;
884                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
885                                           sk ? inet6_sk(sk)->srcprefs : 0,
886                                           &fl6->saddr);
887                 if (err)
888                         goto out_err_release;
889         }
890
891 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
892         /*
893          * Here if the dst entry we've looked up
894          * has a neighbour entry that is in the INCOMPLETE
895          * state and the src address from the flow is
896          * marked as OPTIMISTIC, we release the found
897          * dst entry and replace it instead with the
898          * dst entry of the nexthop router
899          */
900         rt = (struct rt6_info *) *dst;
901         rcu_read_lock_bh();
902         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
903         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
904         rcu_read_unlock_bh();
905
906         if (err) {
907                 struct inet6_ifaddr *ifp;
908                 struct flowi6 fl_gw6;
909                 int redirect;
910
911                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
912                                       (*dst)->dev, 1);
913
914                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                 if (ifp)
916                         in6_ifa_put(ifp);
917
918                 if (redirect) {
919                         /*
920                          * We need to get the dst entry for the
921                          * default router instead
922                          */
923                         dst_release(*dst);
924                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
925                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
926                         *dst = ip6_route_output(net, sk, &fl_gw6);
927                         if ((err = (*dst)->error))
928                                 goto out_err_release;
929                 }
930         }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         if (err == -ENETUNREACH)
937                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
938         dst_release(*dst);
939         *dst = NULL;
940         return err;
941 }
942
943 /**
944  *      ip6_dst_lookup - perform route lookup on flow
945  *      @sk: socket which provides route info
946  *      @dst: pointer to dst_entry * for result
947  *      @fl6: flow to lookup
948  *
949  *      This function performs a route lookup on the given flow.
950  *
951  *      It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
954 {
955         *dst = NULL;
956         return ip6_dst_lookup_tail(sk, dst, fl6);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
960 /**
961  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
962  *      @sk: socket which provides route info
963  *      @fl6: flow to lookup
964  *      @final_dst: final destination address for ipsec lookup
965  *      @can_sleep: we are in a sleepable context
966  *
967  *      This function performs a route lookup on the given flow.
968  *
969  *      It returns a valid dst pointer on success, or a pointer encoded
970  *      error code.
971  */
972 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
973                                       const struct in6_addr *final_dst,
974                                       bool can_sleep)
975 {
976         struct dst_entry *dst = NULL;
977         int err;
978
979         err = ip6_dst_lookup_tail(sk, &dst, fl6);
980         if (err)
981                 return ERR_PTR(err);
982         if (final_dst)
983                 fl6->daddr = *final_dst;
984         if (can_sleep)
985                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
986
987         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
988 }
989 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
990
991 /**
992  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
993  *      @sk: socket which provides the dst cache and route info
994  *      @fl6: flow to lookup
995  *      @final_dst: final destination address for ipsec lookup
996  *      @can_sleep: we are in a sleepable context
997  *
998  *      This function performs a route lookup on the given flow with the
999  *      possibility of using the cached route in the socket if it is valid.
1000  *      It will take the socket dst lock when operating on the dst cache.
1001  *      As a result, this function can only be used in process context.
1002  *
1003  *      It returns a valid dst pointer on success, or a pointer encoded
1004  *      error code.
1005  */
1006 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1007                                          const struct in6_addr *final_dst,
1008                                          bool can_sleep)
1009 {
1010         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1011         int err;
1012
1013         dst = ip6_sk_dst_check(sk, dst, fl6);
1014
1015         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1016         if (err)
1017                 return ERR_PTR(err);
1018         if (final_dst)
1019                 fl6->daddr = *final_dst;
1020         if (can_sleep)
1021                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1022
1023         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1024 }
1025 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1026
1027 static inline int ip6_ufo_append_data(struct sock *sk,
1028                         int getfrag(void *from, char *to, int offset, int len,
1029                         int odd, struct sk_buff *skb),
1030                         void *from, int length, int hh_len, int fragheaderlen,
1031                         int transhdrlen, int mtu,unsigned int flags,
1032                         struct rt6_info *rt)
1033
1034 {
1035         struct sk_buff *skb;
1036         int err;
1037
1038         /* There is support for UDP large send offload by network
1039          * device, so create one single skb packet containing complete
1040          * udp datagram
1041          */
1042         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1043                 skb = sock_alloc_send_skb(sk,
1044                         hh_len + fragheaderlen + transhdrlen + 20,
1045                         (flags & MSG_DONTWAIT), &err);
1046                 if (skb == NULL)
1047                         return err;
1048
1049                 /* reserve space for Hardware header */
1050                 skb_reserve(skb, hh_len);
1051
1052                 /* create space for UDP/IP header */
1053                 skb_put(skb,fragheaderlen + transhdrlen);
1054
1055                 /* initialize network header pointer */
1056                 skb_reset_network_header(skb);
1057
1058                 /* initialize protocol header pointer */
1059                 skb->transport_header = skb->network_header + fragheaderlen;
1060
1061                 skb->protocol = htons(ETH_P_IPV6);
1062                 skb->ip_summed = CHECKSUM_PARTIAL;
1063                 skb->csum = 0;
1064         }
1065
1066         err = skb_append_datato_frags(sk,skb, getfrag, from,
1067                                       (length - transhdrlen));
1068         if (!err) {
1069                 struct frag_hdr fhdr;
1070
1071                 /* Specify the length of each IPv6 datagram fragment.
1072                  * It has to be a multiple of 8.
1073                  */
1074                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1075                                              sizeof(struct frag_hdr)) & ~7;
1076                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1077                 ipv6_select_ident(&fhdr, rt);
1078                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1079                 __skb_queue_tail(&sk->sk_write_queue, skb);
1080
1081                 return 0;
1082         }
1083         /* There is not enough support do UPD LSO,
1084          * so follow normal path
1085          */
1086         kfree_skb(skb);
1087
1088         return err;
1089 }
1090
1091 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1092                                                gfp_t gfp)
1093 {
1094         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1095 }
1096
1097 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1098                                                 gfp_t gfp)
1099 {
1100         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1101 }
1102
1103 static void ip6_append_data_mtu(unsigned int *mtu,
1104                                 int *maxfraglen,
1105                                 unsigned int fragheaderlen,
1106                                 struct sk_buff *skb,
1107                                 struct rt6_info *rt,
1108                                 bool pmtuprobe)
1109 {
1110         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1111                 if (skb == NULL) {
1112                         /* first fragment, reserve header_len */
1113                         *mtu = *mtu - rt->dst.header_len;
1114
1115                 } else {
1116                         /*
1117                          * this fragment is not first, the headers
1118                          * space is regarded as data space.
1119                          */
1120                         *mtu = min(*mtu, pmtuprobe ?
1121                                    rt->dst.dev->mtu :
1122                                    dst_mtu(rt->dst.path));
1123                 }
1124                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1125                               + fragheaderlen - sizeof(struct frag_hdr);
1126         }
1127 }
1128
1129 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1130         int offset, int len, int odd, struct sk_buff *skb),
1131         void *from, int length, int transhdrlen,
1132         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1133         struct rt6_info *rt, unsigned int flags, int dontfrag)
1134 {
1135         struct inet_sock *inet = inet_sk(sk);
1136         struct ipv6_pinfo *np = inet6_sk(sk);
1137         struct inet_cork *cork;
1138         struct sk_buff *skb, *skb_prev = NULL;
1139         unsigned int maxfraglen, fragheaderlen, mtu;
1140         int exthdrlen;
1141         int dst_exthdrlen;
1142         int hh_len;
1143         int copy;
1144         int err;
1145         int offset = 0;
1146         __u8 tx_flags = 0;
1147
1148         if (flags&MSG_PROBE)
1149                 return 0;
1150         cork = &inet->cork.base;
1151         if (skb_queue_empty(&sk->sk_write_queue)) {
1152                 /*
1153                  * setup for corking
1154                  */
1155                 if (opt) {
1156                         if (WARN_ON(np->cork.opt))
1157                                 return -EINVAL;
1158
1159                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1160                         if (unlikely(np->cork.opt == NULL))
1161                                 return -ENOBUFS;
1162
1163                         np->cork.opt->tot_len = opt->tot_len;
1164                         np->cork.opt->opt_flen = opt->opt_flen;
1165                         np->cork.opt->opt_nflen = opt->opt_nflen;
1166
1167                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1168                                                             sk->sk_allocation);
1169                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1170                                 return -ENOBUFS;
1171
1172                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1173                                                             sk->sk_allocation);
1174                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1175                                 return -ENOBUFS;
1176
1177                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1178                                                            sk->sk_allocation);
1179                         if (opt->hopopt && !np->cork.opt->hopopt)
1180                                 return -ENOBUFS;
1181
1182                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1183                                                             sk->sk_allocation);
1184                         if (opt->srcrt && !np->cork.opt->srcrt)
1185                                 return -ENOBUFS;
1186
1187                         /* need source address above miyazawa*/
1188                 }
1189                 dst_hold(&rt->dst);
1190                 cork->dst = &rt->dst;
1191                 inet->cork.fl.u.ip6 = *fl6;
1192                 np->cork.hop_limit = hlimit;
1193                 np->cork.tclass = tclass;
1194                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1195                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1196                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1197                 else
1198                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1199                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1200                 if (np->frag_size < mtu) {
1201                         if (np->frag_size)
1202                                 mtu = np->frag_size;
1203                 }
1204                 cork->fragsize = mtu;
1205                 if (dst_allfrag(rt->dst.path))
1206                         cork->flags |= IPCORK_ALLFRAG;
1207                 cork->length = 0;
1208                 exthdrlen = (opt ? opt->opt_flen : 0);
1209                 length += exthdrlen;
1210                 transhdrlen += exthdrlen;
1211                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1212         } else {
1213                 rt = (struct rt6_info *)cork->dst;
1214                 fl6 = &inet->cork.fl.u.ip6;
1215                 opt = np->cork.opt;
1216                 transhdrlen = 0;
1217                 exthdrlen = 0;
1218                 dst_exthdrlen = 0;
1219                 mtu = cork->fragsize;
1220         }
1221
1222         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1223
1224         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1225                         (opt ? opt->opt_nflen : 0);
1226         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1227
1228         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1229                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1230                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1231                         return -EMSGSIZE;
1232                 }
1233         }
1234
1235         /* For UDP, check if TX timestamp is enabled */
1236         if (sk->sk_type == SOCK_DGRAM)
1237                 sock_tx_timestamp(sk, &tx_flags);
1238
1239         /*
1240          * Let's try using as much space as possible.
1241          * Use MTU if total length of the message fits into the MTU.
1242          * Otherwise, we need to reserve fragment header and
1243          * fragment alignment (= 8-15 octects, in total).
1244          *
1245          * Note that we may need to "move" the data from the tail of
1246          * of the buffer to the new fragment when we split
1247          * the message.
1248          *
1249          * FIXME: It may be fragmented into multiple chunks
1250          *        at once if non-fragmentable extension headers
1251          *        are too large.
1252          * --yoshfuji
1253          */
1254
1255         cork->length += length;
1256         if (length > mtu) {
1257                 int proto = sk->sk_protocol;
1258                 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1259                         ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1260                         return -EMSGSIZE;
1261                 }
1262
1263                 if (proto == IPPROTO_UDP &&
1264                     (rt->dst.dev->features & NETIF_F_UFO)) {
1265
1266                         err = ip6_ufo_append_data(sk, getfrag, from, length,
1267                                                   hh_len, fragheaderlen,
1268                                                   transhdrlen, mtu, flags, rt);
1269                         if (err)
1270                                 goto error;
1271                         return 0;
1272                 }
1273         }
1274
1275         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1276                 goto alloc_new_skb;
1277
1278         while (length > 0) {
1279                 /* Check if the remaining data fits into current packet. */
1280                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1281                 if (copy < length)
1282                         copy = maxfraglen - skb->len;
1283
1284                 if (copy <= 0) {
1285                         char *data;
1286                         unsigned int datalen;
1287                         unsigned int fraglen;
1288                         unsigned int fraggap;
1289                         unsigned int alloclen;
1290 alloc_new_skb:
1291                         /* There's no room in the current skb */
1292                         if (skb)
1293                                 fraggap = skb->len - maxfraglen;
1294                         else
1295                                 fraggap = 0;
1296                         /* update mtu and maxfraglen if necessary */
1297                         if (skb == NULL || skb_prev == NULL)
1298                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1299                                                     fragheaderlen, skb, rt,
1300                                                     np->pmtudisc ==
1301                                                     IPV6_PMTUDISC_PROBE);
1302
1303                         skb_prev = skb;
1304
1305                         /*
1306                          * If remaining data exceeds the mtu,
1307                          * we know we need more fragment(s).
1308                          */
1309                         datalen = length + fraggap;
1310
1311                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1312                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1313                         if ((flags & MSG_MORE) &&
1314                             !(rt->dst.dev->features&NETIF_F_SG))
1315                                 alloclen = mtu;
1316                         else
1317                                 alloclen = datalen + fragheaderlen;
1318
1319                         alloclen += dst_exthdrlen;
1320
1321                         if (datalen != length + fraggap) {
1322                                 /*
1323                                  * this is not the last fragment, the trailer
1324                                  * space is regarded as data space.
1325                                  */
1326                                 datalen += rt->dst.trailer_len;
1327                         }
1328
1329                         alloclen += rt->dst.trailer_len;
1330                         fraglen = datalen + fragheaderlen;
1331
1332                         /*
1333                          * We just reserve space for fragment header.
1334                          * Note: this may be overallocation if the message
1335                          * (without MSG_MORE) fits into the MTU.
1336                          */
1337                         alloclen += sizeof(struct frag_hdr);
1338
1339                         if (transhdrlen) {
1340                                 skb = sock_alloc_send_skb(sk,
1341                                                 alloclen + hh_len,
1342                                                 (flags & MSG_DONTWAIT), &err);
1343                         } else {
1344                                 skb = NULL;
1345                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1346                                     2 * sk->sk_sndbuf)
1347                                         skb = sock_wmalloc(sk,
1348                                                            alloclen + hh_len, 1,
1349                                                            sk->sk_allocation);
1350                                 if (unlikely(skb == NULL))
1351                                         err = -ENOBUFS;
1352                                 else {
1353                                         /* Only the initial fragment
1354                                          * is time stamped.
1355                                          */
1356                                         tx_flags = 0;
1357                                 }
1358                         }
1359                         if (skb == NULL)
1360                                 goto error;
1361                         /*
1362                          *      Fill in the control structures
1363                          */
1364                         skb->protocol = htons(ETH_P_IPV6);
1365                         skb->ip_summed = CHECKSUM_NONE;
1366                         skb->csum = 0;
1367                         /* reserve for fragmentation and ipsec header */
1368                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1369                                     dst_exthdrlen);
1370
1371                         if (sk->sk_type == SOCK_DGRAM)
1372                                 skb_shinfo(skb)->tx_flags = tx_flags;
1373
1374                         /*
1375                          *      Find where to start putting bytes
1376                          */
1377                         data = skb_put(skb, fraglen);
1378                         skb_set_network_header(skb, exthdrlen);
1379                         data += fragheaderlen;
1380                         skb->transport_header = (skb->network_header +
1381                                                  fragheaderlen);
1382                         if (fraggap) {
1383                                 skb->csum = skb_copy_and_csum_bits(
1384                                         skb_prev, maxfraglen,
1385                                         data + transhdrlen, fraggap, 0);
1386                                 skb_prev->csum = csum_sub(skb_prev->csum,
1387                                                           skb->csum);
1388                                 data += fraggap;
1389                                 pskb_trim_unique(skb_prev, maxfraglen);
1390                         }
1391                         copy = datalen - transhdrlen - fraggap;
1392
1393                         if (copy < 0) {
1394                                 err = -EINVAL;
1395                                 kfree_skb(skb);
1396                                 goto error;
1397                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1398                                 err = -EFAULT;
1399                                 kfree_skb(skb);
1400                                 goto error;
1401                         }
1402
1403                         offset += copy;
1404                         length -= datalen - fraggap;
1405                         transhdrlen = 0;
1406                         exthdrlen = 0;
1407                         dst_exthdrlen = 0;
1408
1409                         /*
1410                          * Put the packet on the pending queue
1411                          */
1412                         __skb_queue_tail(&sk->sk_write_queue, skb);
1413                         continue;
1414                 }
1415
1416                 if (copy > length)
1417                         copy = length;
1418
1419                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1420                         unsigned int off;
1421
1422                         off = skb->len;
1423                         if (getfrag(from, skb_put(skb, copy),
1424                                                 offset, copy, off, skb) < 0) {
1425                                 __skb_trim(skb, off);
1426                                 err = -EFAULT;
1427                                 goto error;
1428                         }
1429                 } else {
1430                         int i = skb_shinfo(skb)->nr_frags;
1431                         struct page_frag *pfrag = sk_page_frag(sk);
1432
1433                         err = -ENOMEM;
1434                         if (!sk_page_frag_refill(sk, pfrag))
1435                                 goto error;
1436
1437                         if (!skb_can_coalesce(skb, i, pfrag->page,
1438                                               pfrag->offset)) {
1439                                 err = -EMSGSIZE;
1440                                 if (i == MAX_SKB_FRAGS)
1441                                         goto error;
1442
1443                                 __skb_fill_page_desc(skb, i, pfrag->page,
1444                                                      pfrag->offset, 0);
1445                                 skb_shinfo(skb)->nr_frags = ++i;
1446                                 get_page(pfrag->page);
1447                         }
1448                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1449                         if (getfrag(from,
1450                                     page_address(pfrag->page) + pfrag->offset,
1451                                     offset, copy, skb->len, skb) < 0)
1452                                 goto error_efault;
1453
1454                         pfrag->offset += copy;
1455                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1456                         skb->len += copy;
1457                         skb->data_len += copy;
1458                         skb->truesize += copy;
1459                         atomic_add(copy, &sk->sk_wmem_alloc);
1460                 }
1461                 offset += copy;
1462                 length -= copy;
1463         }
1464
1465         return 0;
1466
1467 error_efault:
1468         err = -EFAULT;
1469 error:
1470         cork->length -= length;
1471         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1472         return err;
1473 }
1474 EXPORT_SYMBOL_GPL(ip6_append_data);
1475
1476 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1477 {
1478         if (np->cork.opt) {
1479                 kfree(np->cork.opt->dst0opt);
1480                 kfree(np->cork.opt->dst1opt);
1481                 kfree(np->cork.opt->hopopt);
1482                 kfree(np->cork.opt->srcrt);
1483                 kfree(np->cork.opt);
1484                 np->cork.opt = NULL;
1485         }
1486
1487         if (inet->cork.base.dst) {
1488                 dst_release(inet->cork.base.dst);
1489                 inet->cork.base.dst = NULL;
1490                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1491         }
1492         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1493 }
1494
1495 int ip6_push_pending_frames(struct sock *sk)
1496 {
1497         struct sk_buff *skb, *tmp_skb;
1498         struct sk_buff **tail_skb;
1499         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1500         struct inet_sock *inet = inet_sk(sk);
1501         struct ipv6_pinfo *np = inet6_sk(sk);
1502         struct net *net = sock_net(sk);
1503         struct ipv6hdr *hdr;
1504         struct ipv6_txoptions *opt = np->cork.opt;
1505         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1506         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1507         unsigned char proto = fl6->flowi6_proto;
1508         int err = 0;
1509
1510         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1511                 goto out;
1512         tail_skb = &(skb_shinfo(skb)->frag_list);
1513
1514         /* move skb->data to ip header from ext header */
1515         if (skb->data < skb_network_header(skb))
1516                 __skb_pull(skb, skb_network_offset(skb));
1517         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1518                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1519                 *tail_skb = tmp_skb;
1520                 tail_skb = &(tmp_skb->next);
1521                 skb->len += tmp_skb->len;
1522                 skb->data_len += tmp_skb->len;
1523                 skb->truesize += tmp_skb->truesize;
1524                 tmp_skb->destructor = NULL;
1525                 tmp_skb->sk = NULL;
1526         }
1527
1528         /* Allow local fragmentation. */
1529         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1530                 skb->local_df = 1;
1531
1532         *final_dst = fl6->daddr;
1533         __skb_pull(skb, skb_network_header_len(skb));
1534         if (opt && opt->opt_flen)
1535                 ipv6_push_frag_opts(skb, opt, &proto);
1536         if (opt && opt->opt_nflen)
1537                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1538
1539         skb_push(skb, sizeof(struct ipv6hdr));
1540         skb_reset_network_header(skb);
1541         hdr = ipv6_hdr(skb);
1542
1543         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1544         hdr->hop_limit = np->cork.hop_limit;
1545         hdr->nexthdr = proto;
1546         hdr->saddr = fl6->saddr;
1547         hdr->daddr = *final_dst;
1548
1549         skb->priority = sk->sk_priority;
1550         skb->mark = sk->sk_mark;
1551
1552         skb_dst_set(skb, dst_clone(&rt->dst));
1553         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1554         if (proto == IPPROTO_ICMPV6) {
1555                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1556
1557                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1558                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1559         }
1560
1561         err = ip6_local_out(skb);
1562         if (err) {
1563                 if (err > 0)
1564                         err = net_xmit_errno(err);
1565                 if (err)
1566                         goto error;
1567         }
1568
1569 out:
1570         ip6_cork_release(inet, np);
1571         return err;
1572 error:
1573         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1574         goto out;
1575 }
1576 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1577
1578 void ip6_flush_pending_frames(struct sock *sk)
1579 {
1580         struct sk_buff *skb;
1581
1582         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1583                 if (skb_dst(skb))
1584                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1585                                       IPSTATS_MIB_OUTDISCARDS);
1586                 kfree_skb(skb);
1587         }
1588
1589         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1590 }
1591 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);