Merge branch 'v3.10/topic/gator' of git://git.linaro.org/kernel/linux-linaro-stable...
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int __ip6_local_out(struct sk_buff *skb)
60 {
61         int len;
62
63         len = skb->len - sizeof(struct ipv6hdr);
64         if (len > IPV6_MAXPLEN)
65                 len = 0;
66         ipv6_hdr(skb)->payload_len = htons(len);
67
68         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
69                        skb_dst(skb)->dev, dst_output);
70 }
71
72 int ip6_local_out(struct sk_buff *skb)
73 {
74         int err;
75
76         err = __ip6_local_out(skb);
77         if (likely(err == 1))
78                 err = dst_output(skb);
79
80         return err;
81 }
82 EXPORT_SYMBOL_GPL(ip6_local_out);
83
84 static int ip6_finish_output2(struct sk_buff *skb)
85 {
86         struct dst_entry *dst = skb_dst(skb);
87         struct net_device *dev = dst->dev;
88         struct neighbour *neigh;
89         struct in6_addr *nexthop;
90         int ret;
91
92         skb->protocol = htons(ETH_P_IPV6);
93         skb->dev = dev;
94
95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
99                     ((mroute6_socket(dev_net(dev), skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(dev_net(dev), idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120
121                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122                                 skb->len);
123
124                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125                     IPV6_ADDR_SCOPE_NODELOCAL &&
126                     !(dev->flags & IFF_LOOPBACK)) {
127                         kfree_skb(skb);
128                         return 0;
129                 }
130         }
131
132         rcu_read_lock_bh();
133         nexthop = rt6_nexthop((struct rt6_info *)dst);
134         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135         if (unlikely(!neigh))
136                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137         if (!IS_ERR(neigh)) {
138                 ret = dst_neigh_output(dst, neigh, skb);
139                 rcu_read_unlock_bh();
140                 return ret;
141         }
142         rcu_read_unlock_bh();
143
144         IP6_INC_STATS(dev_net(dst->dev),
145                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146         kfree_skb(skb);
147         return -EINVAL;
148 }
149
150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153             dst_allfrag(skb_dst(skb)) ||
154             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
155                 return ip6_fragment(skb, ip6_finish_output2);
156         else
157                 return ip6_finish_output2(skb);
158 }
159
160 int ip6_output(struct sk_buff *skb)
161 {
162         struct net_device *dev = skb_dst(skb)->dev;
163         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
164         if (unlikely(idev->cnf.disable_ipv6)) {
165                 IP6_INC_STATS(dev_net(dev), idev,
166                               IPSTATS_MIB_OUTDISCARDS);
167                 kfree_skb(skb);
168                 return 0;
169         }
170
171         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
172                             ip6_finish_output,
173                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174 }
175
176 /*
177  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
178  */
179
180 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
181              struct ipv6_txoptions *opt, int tclass)
182 {
183         struct net *net = sock_net(sk);
184         struct ipv6_pinfo *np = inet6_sk(sk);
185         struct in6_addr *first_hop = &fl6->daddr;
186         struct dst_entry *dst = skb_dst(skb);
187         struct ipv6hdr *hdr;
188         u8  proto = fl6->flowi6_proto;
189         int seg_len = skb->len;
190         int hlimit = -1;
191         u32 mtu;
192
193         if (opt) {
194                 unsigned int head_room;
195
196                 /* First: exthdrs may take lots of space (~8K for now)
197                    MAX_HEADER is not enough.
198                  */
199                 head_room = opt->opt_nflen + opt->opt_flen;
200                 seg_len += head_room;
201                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
202
203                 if (skb_headroom(skb) < head_room) {
204                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
205                         if (skb2 == NULL) {
206                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
207                                               IPSTATS_MIB_OUTDISCARDS);
208                                 kfree_skb(skb);
209                                 return -ENOBUFS;
210                         }
211                         consume_skb(skb);
212                         skb = skb2;
213                         skb_set_owner_w(skb, sk);
214                 }
215                 if (opt->opt_flen)
216                         ipv6_push_frag_opts(skb, opt, &proto);
217                 if (opt->opt_nflen)
218                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
219         }
220
221         skb_push(skb, sizeof(struct ipv6hdr));
222         skb_reset_network_header(skb);
223         hdr = ipv6_hdr(skb);
224
225         /*
226          *      Fill in the IPv6 header
227          */
228         if (np)
229                 hlimit = np->hop_limit;
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232
233         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
234
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238
239         hdr->saddr = fl6->saddr;
240         hdr->daddr = *first_hop;
241
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252
253         skb->dev = dst->dev;
254         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
255         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
256         kfree_skb(skb);
257         return -EMSGSIZE;
258 }
259
260 EXPORT_SYMBOL(ip6_xmit);
261
262 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
263 {
264         struct ip6_ra_chain *ra;
265         struct sock *last = NULL;
266
267         read_lock(&ip6_ra_lock);
268         for (ra = ip6_ra_chain; ra; ra = ra->next) {
269                 struct sock *sk = ra->sk;
270                 if (sk && ra->sel == sel &&
271                     (!sk->sk_bound_dev_if ||
272                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
273                         if (last) {
274                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
275                                 if (skb2)
276                                         rawv6_rcv(last, skb2);
277                         }
278                         last = sk;
279                 }
280         }
281
282         if (last) {
283                 rawv6_rcv(last, skb);
284                 read_unlock(&ip6_ra_lock);
285                 return 1;
286         }
287         read_unlock(&ip6_ra_lock);
288         return 0;
289 }
290
291 static int ip6_forward_proxy_check(struct sk_buff *skb)
292 {
293         struct ipv6hdr *hdr = ipv6_hdr(skb);
294         u8 nexthdr = hdr->nexthdr;
295         __be16 frag_off;
296         int offset;
297
298         if (ipv6_ext_hdr(nexthdr)) {
299                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
300                 if (offset < 0)
301                         return 0;
302         } else
303                 offset = sizeof(struct ipv6hdr);
304
305         if (nexthdr == IPPROTO_ICMPV6) {
306                 struct icmp6hdr *icmp6;
307
308                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
309                                          offset + 1 - skb->data)))
310                         return 0;
311
312                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
313
314                 switch (icmp6->icmp6_type) {
315                 case NDISC_ROUTER_SOLICITATION:
316                 case NDISC_ROUTER_ADVERTISEMENT:
317                 case NDISC_NEIGHBOUR_SOLICITATION:
318                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
319                 case NDISC_REDIRECT:
320                         /* For reaction involving unicast neighbor discovery
321                          * message destined to the proxied address, pass it to
322                          * input function.
323                          */
324                         return 1;
325                 default:
326                         break;
327                 }
328         }
329
330         /*
331          * The proxying router can't forward traffic sent to a link-local
332          * address, so signal the sender and discard the packet. This
333          * behavior is clarified by the MIPv6 specification.
334          */
335         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
336                 dst_link_failure(skb);
337                 return -1;
338         }
339
340         return 0;
341 }
342
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347
348 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
349 {
350         if (skb->len <= mtu || skb->local_df)
351                 return false;
352
353         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
354                 return true;
355
356         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
357                 return false;
358
359         return true;
360 }
361
362 int ip6_forward(struct sk_buff *skb)
363 {
364         struct dst_entry *dst = skb_dst(skb);
365         struct ipv6hdr *hdr = ipv6_hdr(skb);
366         struct inet6_skb_parm *opt = IP6CB(skb);
367         struct net *net = dev_net(dst->dev);
368         u32 mtu;
369
370         if (net->ipv6.devconf_all->forwarding == 0)
371                 goto error;
372
373         if (skb_warn_if_lro(skb))
374                 goto drop;
375
376         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
377                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
378                 goto drop;
379         }
380
381         if (skb->pkt_type != PACKET_HOST)
382                 goto drop;
383
384         skb_forward_csum(skb);
385
386         /*
387          *      We DO NOT make any processing on
388          *      RA packets, pushing them to user level AS IS
389          *      without ane WARRANTY that application will be able
390          *      to interpret them. The reason is that we
391          *      cannot make anything clever here.
392          *
393          *      We are not end-node, so that if packet contains
394          *      AH/ESP, we cannot make anything.
395          *      Defragmentation also would be mistake, RA packets
396          *      cannot be fragmented, because there is no warranty
397          *      that different fragments will go along one path. --ANK
398          */
399         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
400                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
401                         return 0;
402         }
403
404         /*
405          *      check and decrement ttl
406          */
407         if (hdr->hop_limit <= 1) {
408                 /* Force OUTPUT device used as source address */
409                 skb->dev = dst->dev;
410                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
411                 IP6_INC_STATS_BH(net,
412                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
413
414                 kfree_skb(skb);
415                 return -ETIMEDOUT;
416         }
417
418         /* XXX: idev->cnf.proxy_ndp? */
419         if (net->ipv6.devconf_all->proxy_ndp &&
420             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
421                 int proxied = ip6_forward_proxy_check(skb);
422                 if (proxied > 0)
423                         return ip6_input(skb);
424                 else if (proxied < 0) {
425                         IP6_INC_STATS(net, ip6_dst_idev(dst),
426                                       IPSTATS_MIB_INDISCARDS);
427                         goto drop;
428                 }
429         }
430
431         if (!xfrm6_route_forward(skb)) {
432                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
433                 goto drop;
434         }
435         dst = skb_dst(skb);
436
437         /* IPv6 specs say nothing about it, but it is clear that we cannot
438            send redirects to source routed frames.
439            We don't send redirects to frames decapsulated from IPsec.
440          */
441         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
442                 struct in6_addr *target = NULL;
443                 struct inet_peer *peer;
444                 struct rt6_info *rt;
445
446                 /*
447                  *      incoming and outgoing devices are the same
448                  *      send a redirect.
449                  */
450
451                 rt = (struct rt6_info *) dst;
452                 if (rt->rt6i_flags & RTF_GATEWAY)
453                         target = &rt->rt6i_gateway;
454                 else
455                         target = &hdr->daddr;
456
457                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
458
459                 /* Limit redirects both by destination (here)
460                    and by source (inside ndisc_send_redirect)
461                  */
462                 if (inet_peer_xrlim_allow(peer, 1*HZ))
463                         ndisc_send_redirect(skb, target);
464                 if (peer)
465                         inet_putpeer(peer);
466         } else {
467                 int addrtype = ipv6_addr_type(&hdr->saddr);
468
469                 /* This check is security critical. */
470                 if (addrtype == IPV6_ADDR_ANY ||
471                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
472                         goto error;
473                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
474                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
475                                     ICMPV6_NOT_NEIGHBOUR, 0);
476                         goto error;
477                 }
478         }
479
480         mtu = dst_mtu(dst);
481         if (mtu < IPV6_MIN_MTU)
482                 mtu = IPV6_MIN_MTU;
483
484         if (ip6_pkt_too_big(skb, mtu)) {
485                 /* Again, force OUTPUT device used as source address */
486                 skb->dev = dst->dev;
487                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
488                 IP6_INC_STATS_BH(net,
489                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
490                 IP6_INC_STATS_BH(net,
491                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
492                 kfree_skb(skb);
493                 return -EMSGSIZE;
494         }
495
496         if (skb_cow(skb, dst->dev->hard_header_len)) {
497                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
498                 goto drop;
499         }
500
501         hdr = ipv6_hdr(skb);
502
503         /* Mangling hops number delayed to point after skb COW */
504
505         hdr->hop_limit--;
506
507         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
508         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
509         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
510                        ip6_forward_finish);
511
512 error:
513         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
514 drop:
515         kfree_skb(skb);
516         return -EINVAL;
517 }
518
519 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
520 {
521         to->pkt_type = from->pkt_type;
522         to->priority = from->priority;
523         to->protocol = from->protocol;
524         skb_dst_drop(to);
525         skb_dst_set(to, dst_clone(skb_dst(from)));
526         to->dev = from->dev;
527         to->mark = from->mark;
528
529 #ifdef CONFIG_NET_SCHED
530         to->tc_index = from->tc_index;
531 #endif
532         nf_copy(to, from);
533 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
534         to->nf_trace = from->nf_trace;
535 #endif
536         skb_copy_secmark(to, from);
537 }
538
539 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
540 {
541         struct sk_buff *frag;
542         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
543         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
544         struct ipv6hdr *tmp_hdr;
545         struct frag_hdr *fh;
546         unsigned int mtu, hlen, left, len;
547         int hroom, troom;
548         __be32 frag_id = 0;
549         int ptr, offset = 0, err=0;
550         u8 *prevhdr, nexthdr = 0;
551         struct net *net = dev_net(skb_dst(skb)->dev);
552
553         hlen = ip6_find_1stfragopt(skb, &prevhdr);
554         nexthdr = *prevhdr;
555
556         mtu = ip6_skb_dst_mtu(skb);
557
558         /* We must not fragment if the socket is set to force MTU discovery
559          * or if the skb it not generated by a local socket.
560          */
561         if (unlikely(!skb->local_df && skb->len > mtu) ||
562                      (IP6CB(skb)->frag_max_size &&
563                       IP6CB(skb)->frag_max_size > mtu)) {
564                 if (skb->sk && dst_allfrag(skb_dst(skb)))
565                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
566
567                 skb->dev = skb_dst(skb)->dev;
568                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
569                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
570                               IPSTATS_MIB_FRAGFAILS);
571                 kfree_skb(skb);
572                 return -EMSGSIZE;
573         }
574
575         if (np && np->frag_size < mtu) {
576                 if (np->frag_size)
577                         mtu = np->frag_size;
578         }
579         mtu -= hlen + sizeof(struct frag_hdr);
580
581         if (skb_has_frag_list(skb)) {
582                 int first_len = skb_pagelen(skb);
583                 struct sk_buff *frag2;
584
585                 if (first_len - hlen > mtu ||
586                     ((first_len - hlen) & 7) ||
587                     skb_cloned(skb))
588                         goto slow_path;
589
590                 skb_walk_frags(skb, frag) {
591                         /* Correct geometry. */
592                         if (frag->len > mtu ||
593                             ((frag->len & 7) && frag->next) ||
594                             skb_headroom(frag) < hlen)
595                                 goto slow_path_clean;
596
597                         /* Partially cloned skb? */
598                         if (skb_shared(frag))
599                                 goto slow_path_clean;
600
601                         BUG_ON(frag->sk);
602                         if (skb->sk) {
603                                 frag->sk = skb->sk;
604                                 frag->destructor = sock_wfree;
605                         }
606                         skb->truesize -= frag->truesize;
607                 }
608
609                 err = 0;
610                 offset = 0;
611                 frag = skb_shinfo(skb)->frag_list;
612                 skb_frag_list_init(skb);
613                 /* BUILD HEADER */
614
615                 *prevhdr = NEXTHDR_FRAGMENT;
616                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
617                 if (!tmp_hdr) {
618                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
619                                       IPSTATS_MIB_FRAGFAILS);
620                         return -ENOMEM;
621                 }
622
623                 __skb_pull(skb, hlen);
624                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
625                 __skb_push(skb, hlen);
626                 skb_reset_network_header(skb);
627                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
628
629                 ipv6_select_ident(fh, rt);
630                 fh->nexthdr = nexthdr;
631                 fh->reserved = 0;
632                 fh->frag_off = htons(IP6_MF);
633                 frag_id = fh->identification;
634
635                 first_len = skb_pagelen(skb);
636                 skb->data_len = first_len - skb_headlen(skb);
637                 skb->len = first_len;
638                 ipv6_hdr(skb)->payload_len = htons(first_len -
639                                                    sizeof(struct ipv6hdr));
640
641                 dst_hold(&rt->dst);
642
643                 for (;;) {
644                         /* Prepare header of the next frame,
645                          * before previous one went down. */
646                         if (frag) {
647                                 frag->ip_summed = CHECKSUM_NONE;
648                                 skb_reset_transport_header(frag);
649                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
650                                 __skb_push(frag, hlen);
651                                 skb_reset_network_header(frag);
652                                 memcpy(skb_network_header(frag), tmp_hdr,
653                                        hlen);
654                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
655                                 fh->nexthdr = nexthdr;
656                                 fh->reserved = 0;
657                                 fh->frag_off = htons(offset);
658                                 if (frag->next != NULL)
659                                         fh->frag_off |= htons(IP6_MF);
660                                 fh->identification = frag_id;
661                                 ipv6_hdr(frag)->payload_len =
662                                                 htons(frag->len -
663                                                       sizeof(struct ipv6hdr));
664                                 ip6_copy_metadata(frag, skb);
665                         }
666
667                         err = output(skb);
668                         if(!err)
669                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
670                                               IPSTATS_MIB_FRAGCREATES);
671
672                         if (err || !frag)
673                                 break;
674
675                         skb = frag;
676                         frag = skb->next;
677                         skb->next = NULL;
678                 }
679
680                 kfree(tmp_hdr);
681
682                 if (err == 0) {
683                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
684                                       IPSTATS_MIB_FRAGOKS);
685                         ip6_rt_put(rt);
686                         return 0;
687                 }
688
689                 while (frag) {
690                         skb = frag->next;
691                         kfree_skb(frag);
692                         frag = skb;
693                 }
694
695                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
696                               IPSTATS_MIB_FRAGFAILS);
697                 ip6_rt_put(rt);
698                 return err;
699
700 slow_path_clean:
701                 skb_walk_frags(skb, frag2) {
702                         if (frag2 == frag)
703                                 break;
704                         frag2->sk = NULL;
705                         frag2->destructor = NULL;
706                         skb->truesize += frag2->truesize;
707                 }
708         }
709
710 slow_path:
711         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
712             skb_checksum_help(skb))
713                 goto fail;
714
715         left = skb->len - hlen;         /* Space per frame */
716         ptr = hlen;                     /* Where to start from */
717
718         /*
719          *      Fragment the datagram.
720          */
721
722         *prevhdr = NEXTHDR_FRAGMENT;
723         hroom = LL_RESERVED_SPACE(rt->dst.dev);
724         troom = rt->dst.dev->needed_tailroom;
725
726         /*
727          *      Keep copying data until we run out.
728          */
729         while(left > 0) {
730                 len = left;
731                 /* IF: it doesn't fit, use 'mtu' - the data space left */
732                 if (len > mtu)
733                         len = mtu;
734                 /* IF: we are not sending up to and including the packet end
735                    then align the next start on an eight byte boundary */
736                 if (len < left) {
737                         len &= ~7;
738                 }
739                 /*
740                  *      Allocate buffer.
741                  */
742
743                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
744                                       hroom + troom, GFP_ATOMIC)) == NULL) {
745                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
746                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
747                                       IPSTATS_MIB_FRAGFAILS);
748                         err = -ENOMEM;
749                         goto fail;
750                 }
751
752                 /*
753                  *      Set up data on packet
754                  */
755
756                 ip6_copy_metadata(frag, skb);
757                 skb_reserve(frag, hroom);
758                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
759                 skb_reset_network_header(frag);
760                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
761                 frag->transport_header = (frag->network_header + hlen +
762                                           sizeof(struct frag_hdr));
763
764                 /*
765                  *      Charge the memory for the fragment to any owner
766                  *      it might possess
767                  */
768                 if (skb->sk)
769                         skb_set_owner_w(frag, skb->sk);
770
771                 /*
772                  *      Copy the packet header into the new buffer.
773                  */
774                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
775
776                 /*
777                  *      Build fragment header.
778                  */
779                 fh->nexthdr = nexthdr;
780                 fh->reserved = 0;
781                 if (!frag_id) {
782                         ipv6_select_ident(fh, rt);
783                         frag_id = fh->identification;
784                 } else
785                         fh->identification = frag_id;
786
787                 /*
788                  *      Copy a block of the IP datagram.
789                  */
790                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
791                         BUG();
792                 left -= len;
793
794                 fh->frag_off = htons(offset);
795                 if (left > 0)
796                         fh->frag_off |= htons(IP6_MF);
797                 ipv6_hdr(frag)->payload_len = htons(frag->len -
798                                                     sizeof(struct ipv6hdr));
799
800                 ptr += len;
801                 offset += len;
802
803                 /*
804                  *      Put this fragment into the sending queue.
805                  */
806                 err = output(frag);
807                 if (err)
808                         goto fail;
809
810                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
811                               IPSTATS_MIB_FRAGCREATES);
812         }
813         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
814                       IPSTATS_MIB_FRAGOKS);
815         consume_skb(skb);
816         return err;
817
818 fail:
819         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
820                       IPSTATS_MIB_FRAGFAILS);
821         kfree_skb(skb);
822         return err;
823 }
824
825 static inline int ip6_rt_check(const struct rt6key *rt_key,
826                                const struct in6_addr *fl_addr,
827                                const struct in6_addr *addr_cache)
828 {
829         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
830                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
831 }
832
833 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
834                                           struct dst_entry *dst,
835                                           const struct flowi6 *fl6)
836 {
837         struct ipv6_pinfo *np = inet6_sk(sk);
838         struct rt6_info *rt;
839
840         if (!dst)
841                 goto out;
842
843         if (dst->ops->family != AF_INET6) {
844                 dst_release(dst);
845                 return NULL;
846         }
847
848         rt = (struct rt6_info *)dst;
849         /* Yes, checking route validity in not connected
850          * case is not very simple. Take into account,
851          * that we do not support routing by source, TOS,
852          * and MSG_DONTROUTE            --ANK (980726)
853          *
854          * 1. ip6_rt_check(): If route was host route,
855          *    check that cached destination is current.
856          *    If it is network route, we still may
857          *    check its validity using saved pointer
858          *    to the last used address: daddr_cache.
859          *    We do not want to save whole address now,
860          *    (because main consumer of this service
861          *    is tcp, which has not this problem),
862          *    so that the last trick works only on connected
863          *    sockets.
864          * 2. oif also should be the same.
865          */
866         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
867 #ifdef CONFIG_IPV6_SUBTREES
868             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
869 #endif
870             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
871                 dst_release(dst);
872                 dst = NULL;
873         }
874
875 out:
876         return dst;
877 }
878
879 static int ip6_dst_lookup_tail(struct sock *sk,
880                                struct dst_entry **dst, struct flowi6 *fl6)
881 {
882         struct net *net = sock_net(sk);
883 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
884         struct neighbour *n;
885         struct rt6_info *rt;
886 #endif
887         int err;
888
889         if (*dst == NULL)
890                 *dst = ip6_route_output(net, sk, fl6);
891
892         if ((err = (*dst)->error))
893                 goto out_err_release;
894
895         if (ipv6_addr_any(&fl6->saddr)) {
896                 struct rt6_info *rt = (struct rt6_info *) *dst;
897                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
898                                           sk ? inet6_sk(sk)->srcprefs : 0,
899                                           &fl6->saddr);
900                 if (err)
901                         goto out_err_release;
902         }
903
904 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
905         /*
906          * Here if the dst entry we've looked up
907          * has a neighbour entry that is in the INCOMPLETE
908          * state and the src address from the flow is
909          * marked as OPTIMISTIC, we release the found
910          * dst entry and replace it instead with the
911          * dst entry of the nexthop router
912          */
913         rt = (struct rt6_info *) *dst;
914         rcu_read_lock_bh();
915         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
916         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
917         rcu_read_unlock_bh();
918
919         if (err) {
920                 struct inet6_ifaddr *ifp;
921                 struct flowi6 fl_gw6;
922                 int redirect;
923
924                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
925                                       (*dst)->dev, 1);
926
927                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
928                 if (ifp)
929                         in6_ifa_put(ifp);
930
931                 if (redirect) {
932                         /*
933                          * We need to get the dst entry for the
934                          * default router instead
935                          */
936                         dst_release(*dst);
937                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
938                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
939                         *dst = ip6_route_output(net, sk, &fl_gw6);
940                         if ((err = (*dst)->error))
941                                 goto out_err_release;
942                 }
943         }
944 #endif
945
946         return 0;
947
948 out_err_release:
949         if (err == -ENETUNREACH)
950                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
951         dst_release(*dst);
952         *dst = NULL;
953         return err;
954 }
955
956 /**
957  *      ip6_dst_lookup - perform route lookup on flow
958  *      @sk: socket which provides route info
959  *      @dst: pointer to dst_entry * for result
960  *      @fl6: flow to lookup
961  *
962  *      This function performs a route lookup on the given flow.
963  *
964  *      It returns zero on success, or a standard errno code on error.
965  */
966 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
967 {
968         *dst = NULL;
969         return ip6_dst_lookup_tail(sk, dst, fl6);
970 }
971 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
972
973 /**
974  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
975  *      @sk: socket which provides route info
976  *      @fl6: flow to lookup
977  *      @final_dst: final destination address for ipsec lookup
978  *      @can_sleep: we are in a sleepable context
979  *
980  *      This function performs a route lookup on the given flow.
981  *
982  *      It returns a valid dst pointer on success, or a pointer encoded
983  *      error code.
984  */
985 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
986                                       const struct in6_addr *final_dst,
987                                       bool can_sleep)
988 {
989         struct dst_entry *dst = NULL;
990         int err;
991
992         err = ip6_dst_lookup_tail(sk, &dst, fl6);
993         if (err)
994                 return ERR_PTR(err);
995         if (final_dst)
996                 fl6->daddr = *final_dst;
997         if (can_sleep)
998                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
999
1000         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1003
1004 /**
1005  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1006  *      @sk: socket which provides the dst cache and route info
1007  *      @fl6: flow to lookup
1008  *      @final_dst: final destination address for ipsec lookup
1009  *      @can_sleep: we are in a sleepable context
1010  *
1011  *      This function performs a route lookup on the given flow with the
1012  *      possibility of using the cached route in the socket if it is valid.
1013  *      It will take the socket dst lock when operating on the dst cache.
1014  *      As a result, this function can only be used in process context.
1015  *
1016  *      It returns a valid dst pointer on success, or a pointer encoded
1017  *      error code.
1018  */
1019 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1020                                          const struct in6_addr *final_dst,
1021                                          bool can_sleep)
1022 {
1023         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1024         int err;
1025
1026         dst = ip6_sk_dst_check(sk, dst, fl6);
1027
1028         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1029         if (err)
1030                 return ERR_PTR(err);
1031         if (final_dst)
1032                 fl6->daddr = *final_dst;
1033         if (can_sleep)
1034                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1035
1036         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1037 }
1038 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1039
1040 static inline int ip6_ufo_append_data(struct sock *sk,
1041                         int getfrag(void *from, char *to, int offset, int len,
1042                         int odd, struct sk_buff *skb),
1043                         void *from, int length, int hh_len, int fragheaderlen,
1044                         int transhdrlen, int mtu,unsigned int flags,
1045                         struct rt6_info *rt)
1046
1047 {
1048         struct sk_buff *skb;
1049         int err;
1050
1051         /* There is support for UDP large send offload by network
1052          * device, so create one single skb packet containing complete
1053          * udp datagram
1054          */
1055         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1056                 struct frag_hdr fhdr;
1057
1058                 skb = sock_alloc_send_skb(sk,
1059                         hh_len + fragheaderlen + transhdrlen + 20,
1060                         (flags & MSG_DONTWAIT), &err);
1061                 if (skb == NULL)
1062                         return err;
1063
1064                 /* reserve space for Hardware header */
1065                 skb_reserve(skb, hh_len);
1066
1067                 /* create space for UDP/IP header */
1068                 skb_put(skb,fragheaderlen + transhdrlen);
1069
1070                 /* initialize network header pointer */
1071                 skb_reset_network_header(skb);
1072
1073                 /* initialize protocol header pointer */
1074                 skb->transport_header = skb->network_header + fragheaderlen;
1075
1076                 skb->ip_summed = CHECKSUM_PARTIAL;
1077                 skb->csum = 0;
1078
1079                 /* Specify the length of each IPv6 datagram fragment.
1080                  * It has to be a multiple of 8.
1081                  */
1082                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1083                                              sizeof(struct frag_hdr)) & ~7;
1084                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1085                 ipv6_select_ident(&fhdr, rt);
1086                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1087                 __skb_queue_tail(&sk->sk_write_queue, skb);
1088         }
1089
1090         return skb_append_datato_frags(sk, skb, getfrag, from,
1091                                        (length - transhdrlen));
1092 }
1093
1094 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1095                                                gfp_t gfp)
1096 {
1097         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1098 }
1099
1100 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1101                                                 gfp_t gfp)
1102 {
1103         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104 }
1105
1106 static void ip6_append_data_mtu(unsigned int *mtu,
1107                                 int *maxfraglen,
1108                                 unsigned int fragheaderlen,
1109                                 struct sk_buff *skb,
1110                                 struct rt6_info *rt,
1111                                 bool pmtuprobe)
1112 {
1113         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1114                 if (skb == NULL) {
1115                         /* first fragment, reserve header_len */
1116                         *mtu = *mtu - rt->dst.header_len;
1117
1118                 } else {
1119                         /*
1120                          * this fragment is not first, the headers
1121                          * space is regarded as data space.
1122                          */
1123                         *mtu = min(*mtu, pmtuprobe ?
1124                                    rt->dst.dev->mtu :
1125                                    dst_mtu(rt->dst.path));
1126                 }
1127                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1128                               + fragheaderlen - sizeof(struct frag_hdr);
1129         }
1130 }
1131
1132 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1133         int offset, int len, int odd, struct sk_buff *skb),
1134         void *from, int length, int transhdrlen,
1135         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1136         struct rt6_info *rt, unsigned int flags, int dontfrag)
1137 {
1138         struct inet_sock *inet = inet_sk(sk);
1139         struct ipv6_pinfo *np = inet6_sk(sk);
1140         struct inet_cork *cork;
1141         struct sk_buff *skb, *skb_prev = NULL;
1142         unsigned int maxfraglen, fragheaderlen, mtu;
1143         int exthdrlen;
1144         int dst_exthdrlen;
1145         int hh_len;
1146         int copy;
1147         int err;
1148         int offset = 0;
1149         __u8 tx_flags = 0;
1150
1151         if (flags&MSG_PROBE)
1152                 return 0;
1153         cork = &inet->cork.base;
1154         if (skb_queue_empty(&sk->sk_write_queue)) {
1155                 /*
1156                  * setup for corking
1157                  */
1158                 if (opt) {
1159                         if (WARN_ON(np->cork.opt))
1160                                 return -EINVAL;
1161
1162                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1163                         if (unlikely(np->cork.opt == NULL))
1164                                 return -ENOBUFS;
1165
1166                         np->cork.opt->tot_len = opt->tot_len;
1167                         np->cork.opt->opt_flen = opt->opt_flen;
1168                         np->cork.opt->opt_nflen = opt->opt_nflen;
1169
1170                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1171                                                             sk->sk_allocation);
1172                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1173                                 return -ENOBUFS;
1174
1175                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1176                                                             sk->sk_allocation);
1177                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1178                                 return -ENOBUFS;
1179
1180                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1181                                                            sk->sk_allocation);
1182                         if (opt->hopopt && !np->cork.opt->hopopt)
1183                                 return -ENOBUFS;
1184
1185                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1186                                                             sk->sk_allocation);
1187                         if (opt->srcrt && !np->cork.opt->srcrt)
1188                                 return -ENOBUFS;
1189
1190                         /* need source address above miyazawa*/
1191                 }
1192                 dst_hold(&rt->dst);
1193                 cork->dst = &rt->dst;
1194                 inet->cork.fl.u.ip6 = *fl6;
1195                 np->cork.hop_limit = hlimit;
1196                 np->cork.tclass = tclass;
1197                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1198                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1199                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1200                 else
1201                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1202                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1203                 if (np->frag_size < mtu) {
1204                         if (np->frag_size)
1205                                 mtu = np->frag_size;
1206                 }
1207                 cork->fragsize = mtu;
1208                 if (dst_allfrag(rt->dst.path))
1209                         cork->flags |= IPCORK_ALLFRAG;
1210                 cork->length = 0;
1211                 exthdrlen = (opt ? opt->opt_flen : 0);
1212                 length += exthdrlen;
1213                 transhdrlen += exthdrlen;
1214                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1215         } else {
1216                 rt = (struct rt6_info *)cork->dst;
1217                 fl6 = &inet->cork.fl.u.ip6;
1218                 opt = np->cork.opt;
1219                 transhdrlen = 0;
1220                 exthdrlen = 0;
1221                 dst_exthdrlen = 0;
1222                 mtu = cork->fragsize;
1223         }
1224
1225         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1226
1227         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1228                         (opt ? opt->opt_nflen : 0);
1229         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1230
1231         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1232                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1233                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1234                         return -EMSGSIZE;
1235                 }
1236         }
1237
1238         /* For UDP, check if TX timestamp is enabled */
1239         if (sk->sk_type == SOCK_DGRAM)
1240                 sock_tx_timestamp(sk, &tx_flags);
1241
1242         /*
1243          * Let's try using as much space as possible.
1244          * Use MTU if total length of the message fits into the MTU.
1245          * Otherwise, we need to reserve fragment header and
1246          * fragment alignment (= 8-15 octects, in total).
1247          *
1248          * Note that we may need to "move" the data from the tail of
1249          * of the buffer to the new fragment when we split
1250          * the message.
1251          *
1252          * FIXME: It may be fragmented into multiple chunks
1253          *        at once if non-fragmentable extension headers
1254          *        are too large.
1255          * --yoshfuji
1256          */
1257
1258         if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1259                                            sk->sk_protocol == IPPROTO_RAW)) {
1260                 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1261                 return -EMSGSIZE;
1262         }
1263
1264         skb = skb_peek_tail(&sk->sk_write_queue);
1265         cork->length += length;
1266         if (((length > mtu) ||
1267              (skb && skb_has_frags(skb))) &&
1268             (sk->sk_protocol == IPPROTO_UDP) &&
1269             (rt->dst.dev->features & NETIF_F_UFO)) {
1270                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1271                                           hh_len, fragheaderlen,
1272                                           transhdrlen, mtu, flags, rt);
1273                 if (err)
1274                         goto error;
1275                 return 0;
1276         }
1277
1278         if (!skb)
1279                 goto alloc_new_skb;
1280
1281         while (length > 0) {
1282                 /* Check if the remaining data fits into current packet. */
1283                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1284                 if (copy < length)
1285                         copy = maxfraglen - skb->len;
1286
1287                 if (copy <= 0) {
1288                         char *data;
1289                         unsigned int datalen;
1290                         unsigned int fraglen;
1291                         unsigned int fraggap;
1292                         unsigned int alloclen;
1293 alloc_new_skb:
1294                         /* There's no room in the current skb */
1295                         if (skb)
1296                                 fraggap = skb->len - maxfraglen;
1297                         else
1298                                 fraggap = 0;
1299                         /* update mtu and maxfraglen if necessary */
1300                         if (skb == NULL || skb_prev == NULL)
1301                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1302                                                     fragheaderlen, skb, rt,
1303                                                     np->pmtudisc ==
1304                                                     IPV6_PMTUDISC_PROBE);
1305
1306                         skb_prev = skb;
1307
1308                         /*
1309                          * If remaining data exceeds the mtu,
1310                          * we know we need more fragment(s).
1311                          */
1312                         datalen = length + fraggap;
1313
1314                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1315                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1316                         if ((flags & MSG_MORE) &&
1317                             !(rt->dst.dev->features&NETIF_F_SG))
1318                                 alloclen = mtu;
1319                         else
1320                                 alloclen = datalen + fragheaderlen;
1321
1322                         alloclen += dst_exthdrlen;
1323
1324                         if (datalen != length + fraggap) {
1325                                 /*
1326                                  * this is not the last fragment, the trailer
1327                                  * space is regarded as data space.
1328                                  */
1329                                 datalen += rt->dst.trailer_len;
1330                         }
1331
1332                         alloclen += rt->dst.trailer_len;
1333                         fraglen = datalen + fragheaderlen;
1334
1335                         /*
1336                          * We just reserve space for fragment header.
1337                          * Note: this may be overallocation if the message
1338                          * (without MSG_MORE) fits into the MTU.
1339                          */
1340                         alloclen += sizeof(struct frag_hdr);
1341
1342                         if (transhdrlen) {
1343                                 skb = sock_alloc_send_skb(sk,
1344                                                 alloclen + hh_len,
1345                                                 (flags & MSG_DONTWAIT), &err);
1346                         } else {
1347                                 skb = NULL;
1348                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1349                                     2 * sk->sk_sndbuf)
1350                                         skb = sock_wmalloc(sk,
1351                                                            alloclen + hh_len, 1,
1352                                                            sk->sk_allocation);
1353                                 if (unlikely(skb == NULL))
1354                                         err = -ENOBUFS;
1355                                 else {
1356                                         /* Only the initial fragment
1357                                          * is time stamped.
1358                                          */
1359                                         tx_flags = 0;
1360                                 }
1361                         }
1362                         if (skb == NULL)
1363                                 goto error;
1364                         /*
1365                          *      Fill in the control structures
1366                          */
1367                         skb->ip_summed = CHECKSUM_NONE;
1368                         skb->csum = 0;
1369                         /* reserve for fragmentation and ipsec header */
1370                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1371                                     dst_exthdrlen);
1372
1373                         if (sk->sk_type == SOCK_DGRAM)
1374                                 skb_shinfo(skb)->tx_flags = tx_flags;
1375
1376                         /*
1377                          *      Find where to start putting bytes
1378                          */
1379                         data = skb_put(skb, fraglen);
1380                         skb_set_network_header(skb, exthdrlen);
1381                         data += fragheaderlen;
1382                         skb->transport_header = (skb->network_header +
1383                                                  fragheaderlen);
1384                         if (fraggap) {
1385                                 skb->csum = skb_copy_and_csum_bits(
1386                                         skb_prev, maxfraglen,
1387                                         data + transhdrlen, fraggap, 0);
1388                                 skb_prev->csum = csum_sub(skb_prev->csum,
1389                                                           skb->csum);
1390                                 data += fraggap;
1391                                 pskb_trim_unique(skb_prev, maxfraglen);
1392                         }
1393                         copy = datalen - transhdrlen - fraggap;
1394
1395                         if (copy < 0) {
1396                                 err = -EINVAL;
1397                                 kfree_skb(skb);
1398                                 goto error;
1399                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1400                                 err = -EFAULT;
1401                                 kfree_skb(skb);
1402                                 goto error;
1403                         }
1404
1405                         offset += copy;
1406                         length -= datalen - fraggap;
1407                         transhdrlen = 0;
1408                         exthdrlen = 0;
1409                         dst_exthdrlen = 0;
1410
1411                         /*
1412                          * Put the packet on the pending queue
1413                          */
1414                         __skb_queue_tail(&sk->sk_write_queue, skb);
1415                         continue;
1416                 }
1417
1418                 if (copy > length)
1419                         copy = length;
1420
1421                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1422                         unsigned int off;
1423
1424                         off = skb->len;
1425                         if (getfrag(from, skb_put(skb, copy),
1426                                                 offset, copy, off, skb) < 0) {
1427                                 __skb_trim(skb, off);
1428                                 err = -EFAULT;
1429                                 goto error;
1430                         }
1431                 } else {
1432                         int i = skb_shinfo(skb)->nr_frags;
1433                         struct page_frag *pfrag = sk_page_frag(sk);
1434
1435                         err = -ENOMEM;
1436                         if (!sk_page_frag_refill(sk, pfrag))
1437                                 goto error;
1438
1439                         if (!skb_can_coalesce(skb, i, pfrag->page,
1440                                               pfrag->offset)) {
1441                                 err = -EMSGSIZE;
1442                                 if (i == MAX_SKB_FRAGS)
1443                                         goto error;
1444
1445                                 __skb_fill_page_desc(skb, i, pfrag->page,
1446                                                      pfrag->offset, 0);
1447                                 skb_shinfo(skb)->nr_frags = ++i;
1448                                 get_page(pfrag->page);
1449                         }
1450                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1451                         if (getfrag(from,
1452                                     page_address(pfrag->page) + pfrag->offset,
1453                                     offset, copy, skb->len, skb) < 0)
1454                                 goto error_efault;
1455
1456                         pfrag->offset += copy;
1457                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1458                         skb->len += copy;
1459                         skb->data_len += copy;
1460                         skb->truesize += copy;
1461                         atomic_add(copy, &sk->sk_wmem_alloc);
1462                 }
1463                 offset += copy;
1464                 length -= copy;
1465         }
1466
1467         return 0;
1468
1469 error_efault:
1470         err = -EFAULT;
1471 error:
1472         cork->length -= length;
1473         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1474         return err;
1475 }
1476 EXPORT_SYMBOL_GPL(ip6_append_data);
1477
1478 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1479 {
1480         if (np->cork.opt) {
1481                 kfree(np->cork.opt->dst0opt);
1482                 kfree(np->cork.opt->dst1opt);
1483                 kfree(np->cork.opt->hopopt);
1484                 kfree(np->cork.opt->srcrt);
1485                 kfree(np->cork.opt);
1486                 np->cork.opt = NULL;
1487         }
1488
1489         if (inet->cork.base.dst) {
1490                 dst_release(inet->cork.base.dst);
1491                 inet->cork.base.dst = NULL;
1492                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1493         }
1494         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1495 }
1496
1497 int ip6_push_pending_frames(struct sock *sk)
1498 {
1499         struct sk_buff *skb, *tmp_skb;
1500         struct sk_buff **tail_skb;
1501         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1502         struct inet_sock *inet = inet_sk(sk);
1503         struct ipv6_pinfo *np = inet6_sk(sk);
1504         struct net *net = sock_net(sk);
1505         struct ipv6hdr *hdr;
1506         struct ipv6_txoptions *opt = np->cork.opt;
1507         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1508         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1509         unsigned char proto = fl6->flowi6_proto;
1510         int err = 0;
1511
1512         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1513                 goto out;
1514         tail_skb = &(skb_shinfo(skb)->frag_list);
1515
1516         /* move skb->data to ip header from ext header */
1517         if (skb->data < skb_network_header(skb))
1518                 __skb_pull(skb, skb_network_offset(skb));
1519         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1520                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1521                 *tail_skb = tmp_skb;
1522                 tail_skb = &(tmp_skb->next);
1523                 skb->len += tmp_skb->len;
1524                 skb->data_len += tmp_skb->len;
1525                 skb->truesize += tmp_skb->truesize;
1526                 tmp_skb->destructor = NULL;
1527                 tmp_skb->sk = NULL;
1528         }
1529
1530         /* Allow local fragmentation. */
1531         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1532                 skb->local_df = 1;
1533
1534         *final_dst = fl6->daddr;
1535         __skb_pull(skb, skb_network_header_len(skb));
1536         if (opt && opt->opt_flen)
1537                 ipv6_push_frag_opts(skb, opt, &proto);
1538         if (opt && opt->opt_nflen)
1539                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1540
1541         skb_push(skb, sizeof(struct ipv6hdr));
1542         skb_reset_network_header(skb);
1543         hdr = ipv6_hdr(skb);
1544
1545         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1546         hdr->hop_limit = np->cork.hop_limit;
1547         hdr->nexthdr = proto;
1548         hdr->saddr = fl6->saddr;
1549         hdr->daddr = *final_dst;
1550
1551         skb->priority = sk->sk_priority;
1552         skb->mark = sk->sk_mark;
1553
1554         skb_dst_set(skb, dst_clone(&rt->dst));
1555         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1556         if (proto == IPPROTO_ICMPV6) {
1557                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1558
1559                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1560                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1561         }
1562
1563         err = ip6_local_out(skb);
1564         if (err) {
1565                 if (err > 0)
1566                         err = net_xmit_errno(err);
1567                 if (err)
1568                         goto error;
1569         }
1570
1571 out:
1572         ip6_cork_release(inet, np);
1573         return err;
1574 error:
1575         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1576         goto out;
1577 }
1578 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1579
1580 void ip6_flush_pending_frames(struct sock *sk)
1581 {
1582         struct sk_buff *skb;
1583
1584         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1585                 if (skb_dst(skb))
1586                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1587                                       IPSTATS_MIB_OUTDISCARDS);
1588                 kfree_skb(skb);
1589         }
1590
1591         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1592 }
1593 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);