Merge remote-tracking branch 'lsk/v3.10/topic/arm64-cpuidle' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int __ip6_local_out(struct sk_buff *skb)
60 {
61         int len;
62
63         len = skb->len - sizeof(struct ipv6hdr);
64         if (len > IPV6_MAXPLEN)
65                 len = 0;
66         ipv6_hdr(skb)->payload_len = htons(len);
67
68         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69                        skb_dst(skb)->dev, dst_output);
70 }
71
72 int ip6_local_out(struct sk_buff *skb)
73 {
74         int err;
75
76         err = __ip6_local_out(skb);
77         if (likely(err == 1))
78                 err = dst_output(skb);
79
80         return err;
81 }
82 EXPORT_SYMBOL_GPL(ip6_local_out);
83
84 static int ip6_finish_output2(struct sk_buff *skb)
85 {
86         struct dst_entry *dst = skb_dst(skb);
87         struct net_device *dev = dst->dev;
88         struct neighbour *neigh;
89         struct in6_addr *nexthop;
90         int ret;
91
92         skb->protocol = htons(ETH_P_IPV6);
93         skb->dev = dev;
94
95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
99                     ((mroute6_socket(dev_net(dev), skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(dev_net(dev), idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120
121                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122                                 skb->len);
123
124                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125                     IPV6_ADDR_SCOPE_NODELOCAL &&
126                     !(dev->flags & IFF_LOOPBACK)) {
127                         kfree_skb(skb);
128                         return 0;
129                 }
130         }
131
132         rcu_read_lock_bh();
133         nexthop = rt6_nexthop((struct rt6_info *)dst);
134         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135         if (unlikely(!neigh))
136                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137         if (!IS_ERR(neigh)) {
138                 ret = dst_neigh_output(dst, neigh, skb);
139                 rcu_read_unlock_bh();
140                 return ret;
141         }
142         rcu_read_unlock_bh();
143
144         IP6_INC_STATS(dev_net(dst->dev),
145                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146         kfree_skb(skb);
147         return -EINVAL;
148 }
149
150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153             dst_allfrag(skb_dst(skb)) ||
154             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
155                 return ip6_fragment(skb, ip6_finish_output2);
156         else
157                 return ip6_finish_output2(skb);
158 }
159
160 int ip6_output(struct sk_buff *skb)
161 {
162         struct net_device *dev = skb_dst(skb)->dev;
163         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
164         if (unlikely(idev->cnf.disable_ipv6)) {
165                 IP6_INC_STATS(dev_net(dev), idev,
166                               IPSTATS_MIB_OUTDISCARDS);
167                 kfree_skb(skb);
168                 return 0;
169         }
170
171         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
172                             ip6_finish_output,
173                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174 }
175
176 /*
177  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
178  */
179
180 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
181              struct ipv6_txoptions *opt, int tclass)
182 {
183         struct net *net = sock_net(sk);
184         struct ipv6_pinfo *np = inet6_sk(sk);
185         struct in6_addr *first_hop = &fl6->daddr;
186         struct dst_entry *dst = skb_dst(skb);
187         struct ipv6hdr *hdr;
188         u8  proto = fl6->flowi6_proto;
189         int seg_len = skb->len;
190         int hlimit = -1;
191         u32 mtu;
192
193         if (opt) {
194                 unsigned int head_room;
195
196                 /* First: exthdrs may take lots of space (~8K for now)
197                    MAX_HEADER is not enough.
198                  */
199                 head_room = opt->opt_nflen + opt->opt_flen;
200                 seg_len += head_room;
201                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
202
203                 if (skb_headroom(skb) < head_room) {
204                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
205                         if (skb2 == NULL) {
206                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
207                                               IPSTATS_MIB_OUTDISCARDS);
208                                 kfree_skb(skb);
209                                 return -ENOBUFS;
210                         }
211                         consume_skb(skb);
212                         skb = skb2;
213                         skb_set_owner_w(skb, sk);
214                 }
215                 if (opt->opt_flen)
216                         ipv6_push_frag_opts(skb, opt, &proto);
217                 if (opt->opt_nflen)
218                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
219         }
220
221         skb_push(skb, sizeof(struct ipv6hdr));
222         skb_reset_network_header(skb);
223         hdr = ipv6_hdr(skb);
224
225         /*
226          *      Fill in the IPv6 header
227          */
228         if (np)
229                 hlimit = np->hop_limit;
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232
233         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
234
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238
239         hdr->saddr = fl6->saddr;
240         hdr->daddr = *first_hop;
241
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         skb->dev = dst->dev;
254         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
255         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
256         kfree_skb(skb);
257         return -EMSGSIZE;
258 }
259
260 EXPORT_SYMBOL(ip6_xmit);
261
262 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
263 {
264         struct ip6_ra_chain *ra;
265         struct sock *last = NULL;
266
267         read_lock(&ip6_ra_lock);
268         for (ra = ip6_ra_chain; ra; ra = ra->next) {
269                 struct sock *sk = ra->sk;
270                 if (sk && ra->sel == sel &&
271                     (!sk->sk_bound_dev_if ||
272                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
273                         if (last) {
274                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
275                                 if (skb2)
276                                         rawv6_rcv(last, skb2);
277                         }
278                         last = sk;
279                 }
280         }
281
282         if (last) {
283                 rawv6_rcv(last, skb);
284                 read_unlock(&ip6_ra_lock);
285                 return 1;
286         }
287         read_unlock(&ip6_ra_lock);
288         return 0;
289 }
290
291 static int ip6_forward_proxy_check(struct sk_buff *skb)
292 {
293         struct ipv6hdr *hdr = ipv6_hdr(skb);
294         u8 nexthdr = hdr->nexthdr;
295         __be16 frag_off;
296         int offset;
297
298         if (ipv6_ext_hdr(nexthdr)) {
299                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
300                 if (offset < 0)
301                         return 0;
302         } else
303                 offset = sizeof(struct ipv6hdr);
304
305         if (nexthdr == IPPROTO_ICMPV6) {
306                 struct icmp6hdr *icmp6;
307
308                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
309                                          offset + 1 - skb->data)))
310                         return 0;
311
312                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
313
314                 switch (icmp6->icmp6_type) {
315                 case NDISC_ROUTER_SOLICITATION:
316                 case NDISC_ROUTER_ADVERTISEMENT:
317                 case NDISC_NEIGHBOUR_SOLICITATION:
318                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
319                 case NDISC_REDIRECT:
320                         /* For reaction involving unicast neighbor discovery
321                          * message destined to the proxied address, pass it to
322                          * input function.
323                          */
324                         return 1;
325                 default:
326                         break;
327                 }
328         }
329
330         /*
331          * The proxying router can't forward traffic sent to a link-local
332          * address, so signal the sender and discard the packet. This
333          * behavior is clarified by the MIPv6 specification.
334          */
335         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
336                 dst_link_failure(skb);
337                 return -1;
338         }
339
340         return 0;
341 }
342
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347
348 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
349 {
350         if (skb->len <= mtu || skb->local_df)
351                 return false;
352
353         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
354                 return true;
355
356         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
357                 return false;
358
359         return true;
360 }
361
362 int ip6_forward(struct sk_buff *skb)
363 {
364         struct dst_entry *dst = skb_dst(skb);
365         struct ipv6hdr *hdr = ipv6_hdr(skb);
366         struct inet6_skb_parm *opt = IP6CB(skb);
367         struct net *net = dev_net(dst->dev);
368         u32 mtu;
369
370         if (net->ipv6.devconf_all->forwarding == 0)
371                 goto error;
372
373         if (skb_warn_if_lro(skb))
374                 goto drop;
375
376         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
377                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
378                 goto drop;
379         }
380
381         if (skb->pkt_type != PACKET_HOST)
382                 goto drop;
383
384         skb_forward_csum(skb);
385
386         /*
387          *      We DO NOT make any processing on
388          *      RA packets, pushing them to user level AS IS
389          *      without ane WARRANTY that application will be able
390          *      to interpret them. The reason is that we
391          *      cannot make anything clever here.
392          *
393          *      We are not end-node, so that if packet contains
394          *      AH/ESP, we cannot make anything.
395          *      Defragmentation also would be mistake, RA packets
396          *      cannot be fragmented, because there is no warranty
397          *      that different fragments will go along one path. --ANK
398          */
399         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
400                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
401                         return 0;
402         }
403
404         /*
405          *      check and decrement ttl
406          */
407         if (hdr->hop_limit <= 1) {
408                 /* Force OUTPUT device used as source address */
409                 skb->dev = dst->dev;
410                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
411                 IP6_INC_STATS_BH(net,
412                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
413
414                 kfree_skb(skb);
415                 return -ETIMEDOUT;
416         }
417
418         /* XXX: idev->cnf.proxy_ndp? */
419         if (net->ipv6.devconf_all->proxy_ndp &&
420             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
421                 int proxied = ip6_forward_proxy_check(skb);
422                 if (proxied > 0)
423                         return ip6_input(skb);
424                 else if (proxied < 0) {
425                         IP6_INC_STATS(net, ip6_dst_idev(dst),
426                                       IPSTATS_MIB_INDISCARDS);
427                         goto drop;
428                 }
429         }
430
431         if (!xfrm6_route_forward(skb)) {
432                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
433                 goto drop;
434         }
435         dst = skb_dst(skb);
436
437         /* IPv6 specs say nothing about it, but it is clear that we cannot
438            send redirects to source routed frames.
439            We don't send redirects to frames decapsulated from IPsec.
440          */
441         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
442                 struct in6_addr *target = NULL;
443                 struct inet_peer *peer;
444                 struct rt6_info *rt;
445
446                 /*
447                  *      incoming and outgoing devices are the same
448                  *      send a redirect.
449                  */
450
451                 rt = (struct rt6_info *) dst;
452                 if (rt->rt6i_flags & RTF_GATEWAY)
453                         target = &rt->rt6i_gateway;
454                 else
455                         target = &hdr->daddr;
456
457                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
458
459                 /* Limit redirects both by destination (here)
460                    and by source (inside ndisc_send_redirect)
461                  */
462                 if (inet_peer_xrlim_allow(peer, 1*HZ))
463                         ndisc_send_redirect(skb, target);
464                 if (peer)
465                         inet_putpeer(peer);
466         } else {
467                 int addrtype = ipv6_addr_type(&hdr->saddr);
468
469                 /* This check is security critical. */
470                 if (addrtype == IPV6_ADDR_ANY ||
471                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
472                         goto error;
473                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
474                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
475                                     ICMPV6_NOT_NEIGHBOUR, 0);
476                         goto error;
477                 }
478         }
479
480         mtu = dst_mtu(dst);
481         if (mtu < IPV6_MIN_MTU)
482                 mtu = IPV6_MIN_MTU;
483
484         if (ip6_pkt_too_big(skb, mtu)) {
485                 /* Again, force OUTPUT device used as source address */
486                 skb->dev = dst->dev;
487                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
488                 IP6_INC_STATS_BH(net,
489                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
490                 IP6_INC_STATS_BH(net,
491                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
492                 kfree_skb(skb);
493                 return -EMSGSIZE;
494         }
495
496         if (skb_cow(skb, dst->dev->hard_header_len)) {
497                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
498                 goto drop;
499         }
500
501         hdr = ipv6_hdr(skb);
502
503         /* Mangling hops number delayed to point after skb COW */
504
505         hdr->hop_limit--;
506
507         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
508         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
509         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
510                        ip6_forward_finish);
511
512 error:
513         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
514 drop:
515         kfree_skb(skb);
516         return -EINVAL;
517 }
518
519 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
520 {
521         to->pkt_type = from->pkt_type;
522         to->priority = from->priority;
523         to->protocol = from->protocol;
524         skb_dst_drop(to);
525         skb_dst_set(to, dst_clone(skb_dst(from)));
526         to->dev = from->dev;
527         to->mark = from->mark;
528
529 #ifdef CONFIG_NET_SCHED
530         to->tc_index = from->tc_index;
531 #endif
532         nf_copy(to, from);
533 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
534         to->nf_trace = from->nf_trace;
535 #endif
536         skb_copy_secmark(to, from);
537 }
538
539 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
540 {
541         struct sk_buff *frag;
542         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
543         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
544         struct ipv6hdr *tmp_hdr;
545         struct frag_hdr *fh;
546         unsigned int mtu, hlen, left, len;
547         int hroom, troom;
548         __be32 frag_id = 0;
549         int ptr, offset = 0, err=0;
550         u8 *prevhdr, nexthdr = 0;
551         struct net *net = dev_net(skb_dst(skb)->dev);
552
553         hlen = ip6_find_1stfragopt(skb, &prevhdr);
554         nexthdr = *prevhdr;
555
556         mtu = ip6_skb_dst_mtu(skb);
557
558         /* We must not fragment if the socket is set to force MTU discovery
559          * or if the skb it not generated by a local socket.
560          */
561         if (unlikely(!skb->local_df && skb->len > mtu) ||
562                      (IP6CB(skb)->frag_max_size &&
563                       IP6CB(skb)->frag_max_size > mtu)) {
564                 if (skb->sk && dst_allfrag(skb_dst(skb)))
565                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
566
567                 skb->dev = skb_dst(skb)->dev;
568                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
569                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
570                               IPSTATS_MIB_FRAGFAILS);
571                 kfree_skb(skb);
572                 return -EMSGSIZE;
573         }
574
575         if (np && np->frag_size < mtu) {
576                 if (np->frag_size)
577                         mtu = np->frag_size;
578         }
579         mtu -= hlen + sizeof(struct frag_hdr);
580
581         if (skb_has_frag_list(skb)) {
582                 int first_len = skb_pagelen(skb);
583                 struct sk_buff *frag2;
584
585                 if (first_len - hlen > mtu ||
586                     ((first_len - hlen) & 7) ||
587                     skb_cloned(skb))
588                         goto slow_path;
589
590                 skb_walk_frags(skb, frag) {
591                         /* Correct geometry. */
592                         if (frag->len > mtu ||
593                             ((frag->len & 7) && frag->next) ||
594                             skb_headroom(frag) < hlen)
595                                 goto slow_path_clean;
596
597                         /* Partially cloned skb? */
598                         if (skb_shared(frag))
599                                 goto slow_path_clean;
600
601                         BUG_ON(frag->sk);
602                         if (skb->sk) {
603                                 frag->sk = skb->sk;
604                                 frag->destructor = sock_wfree;
605                         }
606                         skb->truesize -= frag->truesize;
607                 }
608
609                 err = 0;
610                 offset = 0;
611                 frag = skb_shinfo(skb)->frag_list;
612                 skb_frag_list_init(skb);
613                 /* BUILD HEADER */
614
615                 *prevhdr = NEXTHDR_FRAGMENT;
616                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
617                 if (!tmp_hdr) {
618                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
619                                       IPSTATS_MIB_FRAGFAILS);
620                         return -ENOMEM;
621                 }
622
623                 __skb_pull(skb, hlen);
624                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
625                 __skb_push(skb, hlen);
626                 skb_reset_network_header(skb);
627                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
628
629                 ipv6_select_ident(fh, rt);
630                 fh->nexthdr = nexthdr;
631                 fh->reserved = 0;
632                 fh->frag_off = htons(IP6_MF);
633                 frag_id = fh->identification;
634
635                 first_len = skb_pagelen(skb);
636                 skb->data_len = first_len - skb_headlen(skb);
637                 skb->len = first_len;
638                 ipv6_hdr(skb)->payload_len = htons(first_len -
639                                                    sizeof(struct ipv6hdr));
640
641                 dst_hold(&rt->dst);
642
643                 for (;;) {
644                         /* Prepare header of the next frame,
645                          * before previous one went down. */
646                         if (frag) {
647                                 frag->ip_summed = CHECKSUM_NONE;
648                                 skb_reset_transport_header(frag);
649                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
650                                 __skb_push(frag, hlen);
651                                 skb_reset_network_header(frag);
652                                 memcpy(skb_network_header(frag), tmp_hdr,
653                                        hlen);
654                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
655                                 fh->nexthdr = nexthdr;
656                                 fh->reserved = 0;
657                                 fh->frag_off = htons(offset);
658                                 if (frag->next != NULL)
659                                         fh->frag_off |= htons(IP6_MF);
660                                 fh->identification = frag_id;
661                                 ipv6_hdr(frag)->payload_len =
662                                                 htons(frag->len -
663                                                       sizeof(struct ipv6hdr));
664                                 ip6_copy_metadata(frag, skb);
665                         }
666
667                         err = output(skb);
668                         if(!err)
669                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
670                                               IPSTATS_MIB_FRAGCREATES);
671
672                         if (err || !frag)
673                                 break;
674
675                         skb = frag;
676                         frag = skb->next;
677                         skb->next = NULL;
678                 }
679
680                 kfree(tmp_hdr);
681
682                 if (err == 0) {
683                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
684                                       IPSTATS_MIB_FRAGOKS);
685                         ip6_rt_put(rt);
686                         return 0;
687                 }
688
689                 while (frag) {
690                         skb = frag->next;
691                         kfree_skb(frag);
692                         frag = skb;
693                 }
694
695                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
696                               IPSTATS_MIB_FRAGFAILS);
697                 ip6_rt_put(rt);
698                 return err;
699
700 slow_path_clean:
701                 skb_walk_frags(skb, frag2) {
702                         if (frag2 == frag)
703                                 break;
704                         frag2->sk = NULL;
705                         frag2->destructor = NULL;
706                         skb->truesize += frag2->truesize;
707                 }
708         }
709
710 slow_path:
711         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
712             skb_checksum_help(skb))
713                 goto fail;
714
715         left = skb->len - hlen;         /* Space per frame */
716         ptr = hlen;                     /* Where to start from */
717
718         /*
719          *      Fragment the datagram.
720          */
721
722         *prevhdr = NEXTHDR_FRAGMENT;
723         hroom = LL_RESERVED_SPACE(rt->dst.dev);
724         troom = rt->dst.dev->needed_tailroom;
725
726         /*
727          *      Keep copying data until we run out.
728          */
729         while(left > 0) {
730                 len = left;
731                 /* IF: it doesn't fit, use 'mtu' - the data space left */
732                 if (len > mtu)
733                         len = mtu;
734                 /* IF: we are not sending up to and including the packet end
735                    then align the next start on an eight byte boundary */
736                 if (len < left) {
737                         len &= ~7;
738                 }
739                 /*
740                  *      Allocate buffer.
741                  */
742
743                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
744                                       hroom + troom, GFP_ATOMIC)) == NULL) {
745                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
746                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
747                                       IPSTATS_MIB_FRAGFAILS);
748                         err = -ENOMEM;
749                         goto fail;
750                 }
751
752                 /*
753                  *      Set up data on packet
754                  */
755
756                 ip6_copy_metadata(frag, skb);
757                 skb_reserve(frag, hroom);
758                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
759                 skb_reset_network_header(frag);
760                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
761                 frag->transport_header = (frag->network_header + hlen +
762                                           sizeof(struct frag_hdr));
763
764                 /*
765                  *      Charge the memory for the fragment to any owner
766                  *      it might possess
767                  */
768                 if (skb->sk)
769                         skb_set_owner_w(frag, skb->sk);
770
771                 /*
772                  *      Copy the packet header into the new buffer.
773                  */
774                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
775
776                 /*
777                  *      Build fragment header.
778                  */
779                 fh->nexthdr = nexthdr;
780                 fh->reserved = 0;
781                 if (!frag_id) {
782                         ipv6_select_ident(fh, rt);
783                         frag_id = fh->identification;
784                 } else
785                         fh->identification = frag_id;
786
787                 /*
788                  *      Copy a block of the IP datagram.
789                  */
790                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
791                         BUG();
792                 left -= len;
793
794                 fh->frag_off = htons(offset);
795                 if (left > 0)
796                         fh->frag_off |= htons(IP6_MF);
797                 ipv6_hdr(frag)->payload_len = htons(frag->len -
798                                                     sizeof(struct ipv6hdr));
799
800                 ptr += len;
801                 offset += len;
802
803                 /*
804                  *      Put this fragment into the sending queue.
805                  */
806                 err = output(frag);
807                 if (err)
808                         goto fail;
809
810                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
811                               IPSTATS_MIB_FRAGCREATES);
812         }
813         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
814                       IPSTATS_MIB_FRAGOKS);
815         consume_skb(skb);
816         return err;
817
818 fail:
819         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
820                       IPSTATS_MIB_FRAGFAILS);
821         kfree_skb(skb);
822         return err;
823 }
824
825 static inline int ip6_rt_check(const struct rt6key *rt_key,
826                                const struct in6_addr *fl_addr,
827                                const struct in6_addr *addr_cache)
828 {
829         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
830                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
831 }
832
833 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
834                                           struct dst_entry *dst,
835                                           const struct flowi6 *fl6)
836 {
837         struct ipv6_pinfo *np = inet6_sk(sk);
838         struct rt6_info *rt;
839
840         if (!dst)
841                 goto out;
842
843         if (dst->ops->family != AF_INET6) {
844                 dst_release(dst);
845                 return NULL;
846         }
847
848         rt = (struct rt6_info *)dst;
849         /* Yes, checking route validity in not connected
850          * case is not very simple. Take into account,
851          * that we do not support routing by source, TOS,
852          * and MSG_DONTROUTE            --ANK (980726)
853          *
854          * 1. ip6_rt_check(): If route was host route,
855          *    check that cached destination is current.
856          *    If it is network route, we still may
857          *    check its validity using saved pointer
858          *    to the last used address: daddr_cache.
859          *    We do not want to save whole address now,
860          *    (because main consumer of this service
861          *    is tcp, which has not this problem),
862          *    so that the last trick works only on connected
863          *    sockets.
864          * 2. oif also should be the same.
865          */
866         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
867 #ifdef CONFIG_IPV6_SUBTREES
868             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
869 #endif
870             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
871                 dst_release(dst);
872                 dst = NULL;
873         }
874
875 out:
876         return dst;
877 }
878
879 static int ip6_dst_lookup_tail(struct sock *sk,
880                                struct dst_entry **dst, struct flowi6 *fl6)
881 {
882         struct net *net = sock_net(sk);
883 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
884         struct neighbour *n;
885         struct rt6_info *rt;
886 #endif
887         int err;
888
889         if (*dst == NULL)
890                 *dst = ip6_route_output(net, sk, fl6);
891
892         if ((err = (*dst)->error))
893                 goto out_err_release;
894
895         if (ipv6_addr_any(&fl6->saddr)) {
896                 struct rt6_info *rt = (struct rt6_info *) *dst;
897                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
898                                           sk ? inet6_sk(sk)->srcprefs : 0,
899                                           &fl6->saddr);
900                 if (err)
901                         goto out_err_release;
902         }
903
904 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
905         /*
906          * Here if the dst entry we've looked up
907          * has a neighbour entry that is in the INCOMPLETE
908          * state and the src address from the flow is
909          * marked as OPTIMISTIC, we release the found
910          * dst entry and replace it instead with the
911          * dst entry of the nexthop router
912          */
913         rt = (struct rt6_info *) *dst;
914         rcu_read_lock_bh();
915         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
916         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
917         rcu_read_unlock_bh();
918
919         if (err) {
920                 struct inet6_ifaddr *ifp;
921                 struct flowi6 fl_gw6;
922                 int redirect;
923
924                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
925                                       (*dst)->dev, 1);
926
927                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
928                 if (ifp)
929                         in6_ifa_put(ifp);
930
931                 if (redirect) {
932                         /*
933                          * We need to get the dst entry for the
934                          * default router instead
935                          */
936                         dst_release(*dst);
937                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
938                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
939                         *dst = ip6_route_output(net, sk, &fl_gw6);
940                         if ((err = (*dst)->error))
941                                 goto out_err_release;
942                 }
943         }
944 #endif
945
946         return 0;
947
948 out_err_release:
949         if (err == -ENETUNREACH)
950                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
951         dst_release(*dst);
952         *dst = NULL;
953         return err;
954 }
955
956 /**
957  *      ip6_dst_lookup - perform route lookup on flow
958  *      @sk: socket which provides route info
959  *      @dst: pointer to dst_entry * for result
960  *      @fl6: flow to lookup
961  *
962  *      This function performs a route lookup on the given flow.
963  *
964  *      It returns zero on success, or a standard errno code on error.
965  */
966 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
967 {
968         *dst = NULL;
969         return ip6_dst_lookup_tail(sk, dst, fl6);
970 }
971 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
972
973 /**
974  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
975  *      @sk: socket which provides route info
976  *      @fl6: flow to lookup
977  *      @final_dst: final destination address for ipsec lookup
978  *      @can_sleep: we are in a sleepable context
979  *
980  *      This function performs a route lookup on the given flow.
981  *
982  *      It returns a valid dst pointer on success, or a pointer encoded
983  *      error code.
984  */
985 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
986                                       const struct in6_addr *final_dst,
987                                       bool can_sleep)
988 {
989         struct dst_entry *dst = NULL;
990         int err;
991
992         err = ip6_dst_lookup_tail(sk, &dst, fl6);
993         if (err)
994                 return ERR_PTR(err);
995         if (final_dst)
996                 fl6->daddr = *final_dst;
997         if (can_sleep)
998                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
999
1000         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1003
1004 /**
1005  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1006  *      @sk: socket which provides the dst cache and route info
1007  *      @fl6: flow to lookup
1008  *      @final_dst: final destination address for ipsec lookup
1009  *      @can_sleep: we are in a sleepable context
1010  *
1011  *      This function performs a route lookup on the given flow with the
1012  *      possibility of using the cached route in the socket if it is valid.
1013  *      It will take the socket dst lock when operating on the dst cache.
1014  *      As a result, this function can only be used in process context.
1015  *
1016  *      It returns a valid dst pointer on success, or a pointer encoded
1017  *      error code.
1018  */
1019 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1020                                          const struct in6_addr *final_dst,
1021                                          bool can_sleep)
1022 {
1023         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1024         int err;
1025
1026         dst = ip6_sk_dst_check(sk, dst, fl6);
1027
1028         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1029         if (err)
1030                 return ERR_PTR(err);
1031         if (final_dst)
1032                 fl6->daddr = *final_dst;
1033         if (can_sleep)
1034                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1035
1036         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1037 }
1038 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1039
1040 static inline int ip6_ufo_append_data(struct sock *sk,
1041                         int getfrag(void *from, char *to, int offset, int len,
1042                         int odd, struct sk_buff *skb),
1043                         void *from, int length, int hh_len, int fragheaderlen,
1044                         int transhdrlen, int mtu,unsigned int flags,
1045                         struct rt6_info *rt)
1046
1047 {
1048         struct sk_buff *skb;
1049         int err;
1050
1051         /* There is support for UDP large send offload by network
1052          * device, so create one single skb packet containing complete
1053          * udp datagram
1054          */
1055         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1056                 struct frag_hdr fhdr;
1057
1058                 skb = sock_alloc_send_skb(sk,
1059                         hh_len + fragheaderlen + transhdrlen + 20,
1060                         (flags & MSG_DONTWAIT), &err);
1061                 if (skb == NULL)
1062                         return err;
1063
1064                 /* reserve space for Hardware header */
1065                 skb_reserve(skb, hh_len);
1066
1067                 /* create space for UDP/IP header */
1068                 skb_put(skb,fragheaderlen + transhdrlen);
1069
1070                 /* initialize network header pointer */
1071                 skb_reset_network_header(skb);
1072
1073                 /* initialize protocol header pointer */
1074                 skb->transport_header = skb->network_header + fragheaderlen;
1075
1076                 skb->ip_summed = CHECKSUM_PARTIAL;
1077                 skb->csum = 0;
1078
1079                 /* Specify the length of each IPv6 datagram fragment.
1080                  * It has to be a multiple of 8.
1081                  */
1082                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1083                                              sizeof(struct frag_hdr)) & ~7;
1084                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1085                 ipv6_select_ident(&fhdr, rt);
1086                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1087                 __skb_queue_tail(&sk->sk_write_queue, skb);
1088         }
1089
1090         return skb_append_datato_frags(sk, skb, getfrag, from,
1091                                        (length - transhdrlen));
1092 }
1093
1094 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1095                                                gfp_t gfp)
1096 {
1097         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1098 }
1099
1100 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1101                                                 gfp_t gfp)
1102 {
1103         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104 }
1105
1106 static void ip6_append_data_mtu(unsigned int *mtu,
1107                                 int *maxfraglen,
1108                                 unsigned int fragheaderlen,
1109                                 struct sk_buff *skb,
1110                                 struct rt6_info *rt,
1111                                 unsigned int orig_mtu)
1112 {
1113         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1114                 if (skb == NULL) {
1115                         /* first fragment, reserve header_len */
1116                         *mtu = orig_mtu - rt->dst.header_len;
1117
1118                 } else {
1119                         /*
1120                          * this fragment is not first, the headers
1121                          * space is regarded as data space.
1122                          */
1123                         *mtu = orig_mtu;
1124                 }
1125                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1126                               + fragheaderlen - sizeof(struct frag_hdr);
1127         }
1128 }
1129
1130 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1131         int offset, int len, int odd, struct sk_buff *skb),
1132         void *from, int length, int transhdrlen,
1133         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1134         struct rt6_info *rt, unsigned int flags, int dontfrag)
1135 {
1136         struct inet_sock *inet = inet_sk(sk);
1137         struct ipv6_pinfo *np = inet6_sk(sk);
1138         struct inet_cork *cork;
1139         struct sk_buff *skb, *skb_prev = NULL;
1140         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1141         int exthdrlen;
1142         int dst_exthdrlen;
1143         int hh_len;
1144         int copy;
1145         int err;
1146         int offset = 0;
1147         __u8 tx_flags = 0;
1148
1149         if (flags&MSG_PROBE)
1150                 return 0;
1151         cork = &inet->cork.base;
1152         if (skb_queue_empty(&sk->sk_write_queue)) {
1153                 /*
1154                  * setup for corking
1155                  */
1156                 if (opt) {
1157                         if (WARN_ON(np->cork.opt))
1158                                 return -EINVAL;
1159
1160                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1161                         if (unlikely(np->cork.opt == NULL))
1162                                 return -ENOBUFS;
1163
1164                         np->cork.opt->tot_len = opt->tot_len;
1165                         np->cork.opt->opt_flen = opt->opt_flen;
1166                         np->cork.opt->opt_nflen = opt->opt_nflen;
1167
1168                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1169                                                             sk->sk_allocation);
1170                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1171                                 return -ENOBUFS;
1172
1173                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1174                                                             sk->sk_allocation);
1175                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1176                                 return -ENOBUFS;
1177
1178                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1179                                                            sk->sk_allocation);
1180                         if (opt->hopopt && !np->cork.opt->hopopt)
1181                                 return -ENOBUFS;
1182
1183                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1184                                                             sk->sk_allocation);
1185                         if (opt->srcrt && !np->cork.opt->srcrt)
1186                                 return -ENOBUFS;
1187
1188                         /* need source address above miyazawa*/
1189                 }
1190                 dst_hold(&rt->dst);
1191                 cork->dst = &rt->dst;
1192                 inet->cork.fl.u.ip6 = *fl6;
1193                 np->cork.hop_limit = hlimit;
1194                 np->cork.tclass = tclass;
1195                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1196                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1197                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1198                 else
1199                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1200                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1201                 if (np->frag_size < mtu) {
1202                         if (np->frag_size)
1203                                 mtu = np->frag_size;
1204                 }
1205                 cork->fragsize = mtu;
1206                 if (dst_allfrag(rt->dst.path))
1207                         cork->flags |= IPCORK_ALLFRAG;
1208                 cork->length = 0;
1209                 exthdrlen = (opt ? opt->opt_flen : 0);
1210                 length += exthdrlen;
1211                 transhdrlen += exthdrlen;
1212                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1213         } else {
1214                 rt = (struct rt6_info *)cork->dst;
1215                 fl6 = &inet->cork.fl.u.ip6;
1216                 opt = np->cork.opt;
1217                 transhdrlen = 0;
1218                 exthdrlen = 0;
1219                 dst_exthdrlen = 0;
1220                 mtu = cork->fragsize;
1221         }
1222         orig_mtu = mtu;
1223
1224         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1225
1226         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1227                         (opt ? opt->opt_nflen : 0);
1228         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1229
1230         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1231                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1232                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1233                         return -EMSGSIZE;
1234                 }
1235         }
1236
1237         /* For UDP, check if TX timestamp is enabled */
1238         if (sk->sk_type == SOCK_DGRAM)
1239                 sock_tx_timestamp(sk, &tx_flags);
1240
1241         /*
1242          * Let's try using as much space as possible.
1243          * Use MTU if total length of the message fits into the MTU.
1244          * Otherwise, we need to reserve fragment header and
1245          * fragment alignment (= 8-15 octects, in total).
1246          *
1247          * Note that we may need to "move" the data from the tail of
1248          * of the buffer to the new fragment when we split
1249          * the message.
1250          *
1251          * FIXME: It may be fragmented into multiple chunks
1252          *        at once if non-fragmentable extension headers
1253          *        are too large.
1254          * --yoshfuji
1255          */
1256
1257         if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1258                                            sk->sk_protocol == IPPROTO_RAW)) {
1259                 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1260                 return -EMSGSIZE;
1261         }
1262
1263         skb = skb_peek_tail(&sk->sk_write_queue);
1264         cork->length += length;
1265         if (((length > mtu) ||
1266              (skb && skb_has_frags(skb))) &&
1267             (sk->sk_protocol == IPPROTO_UDP) &&
1268             (rt->dst.dev->features & NETIF_F_UFO)) {
1269                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1270                                           hh_len, fragheaderlen,
1271                                           transhdrlen, mtu, flags, rt);
1272                 if (err)
1273                         goto error;
1274                 return 0;
1275         }
1276
1277         if (!skb)
1278                 goto alloc_new_skb;
1279
1280         while (length > 0) {
1281                 /* Check if the remaining data fits into current packet. */
1282                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1283                 if (copy < length)
1284                         copy = maxfraglen - skb->len;
1285
1286                 if (copy <= 0) {
1287                         char *data;
1288                         unsigned int datalen;
1289                         unsigned int fraglen;
1290                         unsigned int fraggap;
1291                         unsigned int alloclen;
1292 alloc_new_skb:
1293                         /* There's no room in the current skb */
1294                         if (skb)
1295                                 fraggap = skb->len - maxfraglen;
1296                         else
1297                                 fraggap = 0;
1298                         /* update mtu and maxfraglen if necessary */
1299                         if (skb == NULL || skb_prev == NULL)
1300                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1301                                                     fragheaderlen, skb, rt,
1302                                                     orig_mtu);
1303
1304                         skb_prev = skb;
1305
1306                         /*
1307                          * If remaining data exceeds the mtu,
1308                          * we know we need more fragment(s).
1309                          */
1310                         datalen = length + fraggap;
1311
1312                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1313                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1314                         if ((flags & MSG_MORE) &&
1315                             !(rt->dst.dev->features&NETIF_F_SG))
1316                                 alloclen = mtu;
1317                         else
1318                                 alloclen = datalen + fragheaderlen;
1319
1320                         alloclen += dst_exthdrlen;
1321
1322                         if (datalen != length + fraggap) {
1323                                 /*
1324                                  * this is not the last fragment, the trailer
1325                                  * space is regarded as data space.
1326                                  */
1327                                 datalen += rt->dst.trailer_len;
1328                         }
1329
1330                         alloclen += rt->dst.trailer_len;
1331                         fraglen = datalen + fragheaderlen;
1332
1333                         /*
1334                          * We just reserve space for fragment header.
1335                          * Note: this may be overallocation if the message
1336                          * (without MSG_MORE) fits into the MTU.
1337                          */
1338                         alloclen += sizeof(struct frag_hdr);
1339
1340                         if (transhdrlen) {
1341                                 skb = sock_alloc_send_skb(sk,
1342                                                 alloclen + hh_len,
1343                                                 (flags & MSG_DONTWAIT), &err);
1344                         } else {
1345                                 skb = NULL;
1346                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1347                                     2 * sk->sk_sndbuf)
1348                                         skb = sock_wmalloc(sk,
1349                                                            alloclen + hh_len, 1,
1350                                                            sk->sk_allocation);
1351                                 if (unlikely(skb == NULL))
1352                                         err = -ENOBUFS;
1353                                 else {
1354                                         /* Only the initial fragment
1355                                          * is time stamped.
1356                                          */
1357                                         tx_flags = 0;
1358                                 }
1359                         }
1360                         if (skb == NULL)
1361                                 goto error;
1362                         /*
1363                          *      Fill in the control structures
1364                          */
1365                         skb->ip_summed = CHECKSUM_NONE;
1366                         skb->csum = 0;
1367                         /* reserve for fragmentation and ipsec header */
1368                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1369                                     dst_exthdrlen);
1370
1371                         if (sk->sk_type == SOCK_DGRAM)
1372                                 skb_shinfo(skb)->tx_flags = tx_flags;
1373
1374                         /*
1375                          *      Find where to start putting bytes
1376                          */
1377                         data = skb_put(skb, fraglen);
1378                         skb_set_network_header(skb, exthdrlen);
1379                         data += fragheaderlen;
1380                         skb->transport_header = (skb->network_header +
1381                                                  fragheaderlen);
1382                         if (fraggap) {
1383                                 skb->csum = skb_copy_and_csum_bits(
1384                                         skb_prev, maxfraglen,
1385                                         data + transhdrlen, fraggap, 0);
1386                                 skb_prev->csum = csum_sub(skb_prev->csum,
1387                                                           skb->csum);
1388                                 data += fraggap;
1389                                 pskb_trim_unique(skb_prev, maxfraglen);
1390                         }
1391                         copy = datalen - transhdrlen - fraggap;
1392
1393                         if (copy < 0) {
1394                                 err = -EINVAL;
1395                                 kfree_skb(skb);
1396                                 goto error;
1397                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1398                                 err = -EFAULT;
1399                                 kfree_skb(skb);
1400                                 goto error;
1401                         }
1402
1403                         offset += copy;
1404                         length -= datalen - fraggap;
1405                         transhdrlen = 0;
1406                         exthdrlen = 0;
1407                         dst_exthdrlen = 0;
1408
1409                         /*
1410                          * Put the packet on the pending queue
1411                          */
1412                         __skb_queue_tail(&sk->sk_write_queue, skb);
1413                         continue;
1414                 }
1415
1416                 if (copy > length)
1417                         copy = length;
1418
1419                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1420                         unsigned int off;
1421
1422                         off = skb->len;
1423                         if (getfrag(from, skb_put(skb, copy),
1424                                                 offset, copy, off, skb) < 0) {
1425                                 __skb_trim(skb, off);
1426                                 err = -EFAULT;
1427                                 goto error;
1428                         }
1429                 } else {
1430                         int i = skb_shinfo(skb)->nr_frags;
1431                         struct page_frag *pfrag = sk_page_frag(sk);
1432
1433                         err = -ENOMEM;
1434                         if (!sk_page_frag_refill(sk, pfrag))
1435                                 goto error;
1436
1437                         if (!skb_can_coalesce(skb, i, pfrag->page,
1438                                               pfrag->offset)) {
1439                                 err = -EMSGSIZE;
1440                                 if (i == MAX_SKB_FRAGS)
1441                                         goto error;
1442
1443                                 __skb_fill_page_desc(skb, i, pfrag->page,
1444                                                      pfrag->offset, 0);
1445                                 skb_shinfo(skb)->nr_frags = ++i;
1446                                 get_page(pfrag->page);
1447                         }
1448                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1449                         if (getfrag(from,
1450                                     page_address(pfrag->page) + pfrag->offset,
1451                                     offset, copy, skb->len, skb) < 0)
1452                                 goto error_efault;
1453
1454                         pfrag->offset += copy;
1455                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1456                         skb->len += copy;
1457                         skb->data_len += copy;
1458                         skb->truesize += copy;
1459                         atomic_add(copy, &sk->sk_wmem_alloc);
1460                 }
1461                 offset += copy;
1462                 length -= copy;
1463         }
1464
1465         return 0;
1466
1467 error_efault:
1468         err = -EFAULT;
1469 error:
1470         cork->length -= length;
1471         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1472         return err;
1473 }
1474 EXPORT_SYMBOL_GPL(ip6_append_data);
1475
1476 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1477 {
1478         if (np->cork.opt) {
1479                 kfree(np->cork.opt->dst0opt);
1480                 kfree(np->cork.opt->dst1opt);
1481                 kfree(np->cork.opt->hopopt);
1482                 kfree(np->cork.opt->srcrt);
1483                 kfree(np->cork.opt);
1484                 np->cork.opt = NULL;
1485         }
1486
1487         if (inet->cork.base.dst) {
1488                 dst_release(inet->cork.base.dst);
1489                 inet->cork.base.dst = NULL;
1490                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1491         }
1492         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1493 }
1494
1495 int ip6_push_pending_frames(struct sock *sk)
1496 {
1497         struct sk_buff *skb, *tmp_skb;
1498         struct sk_buff **tail_skb;
1499         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1500         struct inet_sock *inet = inet_sk(sk);
1501         struct ipv6_pinfo *np = inet6_sk(sk);
1502         struct net *net = sock_net(sk);
1503         struct ipv6hdr *hdr;
1504         struct ipv6_txoptions *opt = np->cork.opt;
1505         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1506         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1507         unsigned char proto = fl6->flowi6_proto;
1508         int err = 0;
1509
1510         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1511                 goto out;
1512         tail_skb = &(skb_shinfo(skb)->frag_list);
1513
1514         /* move skb->data to ip header from ext header */
1515         if (skb->data < skb_network_header(skb))
1516                 __skb_pull(skb, skb_network_offset(skb));
1517         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1518                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1519                 *tail_skb = tmp_skb;
1520                 tail_skb = &(tmp_skb->next);
1521                 skb->len += tmp_skb->len;
1522                 skb->data_len += tmp_skb->len;
1523                 skb->truesize += tmp_skb->truesize;
1524                 tmp_skb->destructor = NULL;
1525                 tmp_skb->sk = NULL;
1526         }
1527
1528         /* Allow local fragmentation. */
1529         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1530                 skb->local_df = 1;
1531
1532         *final_dst = fl6->daddr;
1533         __skb_pull(skb, skb_network_header_len(skb));
1534         if (opt && opt->opt_flen)
1535                 ipv6_push_frag_opts(skb, opt, &proto);
1536         if (opt && opt->opt_nflen)
1537                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1538
1539         skb_push(skb, sizeof(struct ipv6hdr));
1540         skb_reset_network_header(skb);
1541         hdr = ipv6_hdr(skb);
1542
1543         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1544         hdr->hop_limit = np->cork.hop_limit;
1545         hdr->nexthdr = proto;
1546         hdr->saddr = fl6->saddr;
1547         hdr->daddr = *final_dst;
1548
1549         skb->priority = sk->sk_priority;
1550         skb->mark = sk->sk_mark;
1551
1552         skb_dst_set(skb, dst_clone(&rt->dst));
1553         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1554         if (proto == IPPROTO_ICMPV6) {
1555                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1556
1557                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1558                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1559         }
1560
1561         err = ip6_local_out(skb);
1562         if (err) {
1563                 if (err > 0)
1564                         err = net_xmit_errno(err);
1565                 if (err)
1566                         goto error;
1567         }
1568
1569 out:
1570         ip6_cork_release(inet, np);
1571         return err;
1572 error:
1573         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1574         goto out;
1575 }
1576 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1577
1578 void ip6_flush_pending_frames(struct sock *sk)
1579 {
1580         struct sk_buff *skb;
1581
1582         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1583                 if (skb_dst(skb))
1584                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1585                                       IPSTATS_MIB_OUTDISCARDS);
1586                 kfree_skb(skb);
1587         }
1588
1589         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1590 }
1591 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);