net: ipv6: mld: introduce mld_{gq, ifc, dad}_stop_timer functions
[firefly-linux-kernel-4.4.55.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct neighbour *neigh;
64         struct in6_addr *nexthop;
65         int ret;
66
67         skb->protocol = htons(ETH_P_IPV6);
68         skb->dev = dev;
69
70         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74                     ((mroute6_socket(dev_net(dev), skb) &&
75                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77                                          &ipv6_hdr(skb)->saddr))) {
78                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80                         /* Do not check for IFF_ALLMULTI; multicast routing
81                            is not supported in any case.
82                          */
83                         if (newskb)
84                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85                                         newskb, NULL, newskb->dev,
86                                         dev_loopback_xmit);
87
88                         if (ipv6_hdr(skb)->hop_limit == 0) {
89                                 IP6_INC_STATS(dev_net(dev), idev,
90                                               IPSTATS_MIB_OUTDISCARDS);
91                                 kfree_skb(skb);
92                                 return 0;
93                         }
94                 }
95
96                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97                                 skb->len);
98
99                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100                     IPV6_ADDR_SCOPE_NODELOCAL &&
101                     !(dev->flags & IFF_LOOPBACK)) {
102                         kfree_skb(skb);
103                         return 0;
104                 }
105         }
106
107         rcu_read_lock_bh();
108         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110         if (unlikely(!neigh))
111                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112         if (!IS_ERR(neigh)) {
113                 ret = dst_neigh_output(dst, neigh, skb);
114                 rcu_read_unlock_bh();
115                 return ret;
116         }
117         rcu_read_unlock_bh();
118
119         IP6_INC_STATS_BH(dev_net(dst->dev),
120                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121         kfree_skb(skb);
122         return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128             dst_allfrag(skb_dst(skb)))
129                 return ip6_fragment(skb, ip6_finish_output2);
130         else
131                 return ip6_finish_output2(skb);
132 }
133
134 int ip6_output(struct sk_buff *skb)
135 {
136         struct net_device *dev = skb_dst(skb)->dev;
137         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
138         if (unlikely(idev->cnf.disable_ipv6)) {
139                 IP6_INC_STATS(dev_net(dev), idev,
140                               IPSTATS_MIB_OUTDISCARDS);
141                 kfree_skb(skb);
142                 return 0;
143         }
144
145         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
146                             ip6_finish_output,
147                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
148 }
149
150 /*
151  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
152  */
153
154 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
155              struct ipv6_txoptions *opt, int tclass)
156 {
157         struct net *net = sock_net(sk);
158         struct ipv6_pinfo *np = inet6_sk(sk);
159         struct in6_addr *first_hop = &fl6->daddr;
160         struct dst_entry *dst = skb_dst(skb);
161         struct ipv6hdr *hdr;
162         u8  proto = fl6->flowi6_proto;
163         int seg_len = skb->len;
164         int hlimit = -1;
165         u32 mtu;
166
167         if (opt) {
168                 unsigned int head_room;
169
170                 /* First: exthdrs may take lots of space (~8K for now)
171                    MAX_HEADER is not enough.
172                  */
173                 head_room = opt->opt_nflen + opt->opt_flen;
174                 seg_len += head_room;
175                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
176
177                 if (skb_headroom(skb) < head_room) {
178                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
179                         if (skb2 == NULL) {
180                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
181                                               IPSTATS_MIB_OUTDISCARDS);
182                                 kfree_skb(skb);
183                                 return -ENOBUFS;
184                         }
185                         consume_skb(skb);
186                         skb = skb2;
187                         skb_set_owner_w(skb, sk);
188                 }
189                 if (opt->opt_flen)
190                         ipv6_push_frag_opts(skb, opt, &proto);
191                 if (opt->opt_nflen)
192                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
193         }
194
195         skb_push(skb, sizeof(struct ipv6hdr));
196         skb_reset_network_header(skb);
197         hdr = ipv6_hdr(skb);
198
199         /*
200          *      Fill in the IPv6 header
201          */
202         if (np)
203                 hlimit = np->hop_limit;
204         if (hlimit < 0)
205                 hlimit = ip6_dst_hoplimit(dst);
206
207         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
208
209         hdr->payload_len = htons(seg_len);
210         hdr->nexthdr = proto;
211         hdr->hop_limit = hlimit;
212
213         hdr->saddr = fl6->saddr;
214         hdr->daddr = *first_hop;
215
216         skb->priority = sk->sk_priority;
217         skb->mark = sk->sk_mark;
218
219         mtu = dst_mtu(dst);
220         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
221                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
222                               IPSTATS_MIB_OUT, skb->len);
223                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
224                                dst->dev, dst_output);
225         }
226
227         skb->dev = dst->dev;
228         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
229         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
230         kfree_skb(skb);
231         return -EMSGSIZE;
232 }
233
234 EXPORT_SYMBOL(ip6_xmit);
235
236 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
237 {
238         struct ip6_ra_chain *ra;
239         struct sock *last = NULL;
240
241         read_lock(&ip6_ra_lock);
242         for (ra = ip6_ra_chain; ra; ra = ra->next) {
243                 struct sock *sk = ra->sk;
244                 if (sk && ra->sel == sel &&
245                     (!sk->sk_bound_dev_if ||
246                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
247                         if (last) {
248                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
249                                 if (skb2)
250                                         rawv6_rcv(last, skb2);
251                         }
252                         last = sk;
253                 }
254         }
255
256         if (last) {
257                 rawv6_rcv(last, skb);
258                 read_unlock(&ip6_ra_lock);
259                 return 1;
260         }
261         read_unlock(&ip6_ra_lock);
262         return 0;
263 }
264
265 static int ip6_forward_proxy_check(struct sk_buff *skb)
266 {
267         struct ipv6hdr *hdr = ipv6_hdr(skb);
268         u8 nexthdr = hdr->nexthdr;
269         __be16 frag_off;
270         int offset;
271
272         if (ipv6_ext_hdr(nexthdr)) {
273                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
274                 if (offset < 0)
275                         return 0;
276         } else
277                 offset = sizeof(struct ipv6hdr);
278
279         if (nexthdr == IPPROTO_ICMPV6) {
280                 struct icmp6hdr *icmp6;
281
282                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
283                                          offset + 1 - skb->data)))
284                         return 0;
285
286                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
287
288                 switch (icmp6->icmp6_type) {
289                 case NDISC_ROUTER_SOLICITATION:
290                 case NDISC_ROUTER_ADVERTISEMENT:
291                 case NDISC_NEIGHBOUR_SOLICITATION:
292                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
293                 case NDISC_REDIRECT:
294                         /* For reaction involving unicast neighbor discovery
295                          * message destined to the proxied address, pass it to
296                          * input function.
297                          */
298                         return 1;
299                 default:
300                         break;
301                 }
302         }
303
304         /*
305          * The proxying router can't forward traffic sent to a link-local
306          * address, so signal the sender and discard the packet. This
307          * behavior is clarified by the MIPv6 specification.
308          */
309         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
310                 dst_link_failure(skb);
311                 return -1;
312         }
313
314         return 0;
315 }
316
317 static inline int ip6_forward_finish(struct sk_buff *skb)
318 {
319         return dst_output(skb);
320 }
321
322 int ip6_forward(struct sk_buff *skb)
323 {
324         struct dst_entry *dst = skb_dst(skb);
325         struct ipv6hdr *hdr = ipv6_hdr(skb);
326         struct inet6_skb_parm *opt = IP6CB(skb);
327         struct net *net = dev_net(dst->dev);
328         u32 mtu;
329
330         if (net->ipv6.devconf_all->forwarding == 0)
331                 goto error;
332
333         if (skb_warn_if_lro(skb))
334                 goto drop;
335
336         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
337                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
338                 goto drop;
339         }
340
341         if (skb->pkt_type != PACKET_HOST)
342                 goto drop;
343
344         skb_forward_csum(skb);
345
346         /*
347          *      We DO NOT make any processing on
348          *      RA packets, pushing them to user level AS IS
349          *      without ane WARRANTY that application will be able
350          *      to interpret them. The reason is that we
351          *      cannot make anything clever here.
352          *
353          *      We are not end-node, so that if packet contains
354          *      AH/ESP, we cannot make anything.
355          *      Defragmentation also would be mistake, RA packets
356          *      cannot be fragmented, because there is no warranty
357          *      that different fragments will go along one path. --ANK
358          */
359         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
360                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
361                         return 0;
362         }
363
364         /*
365          *      check and decrement ttl
366          */
367         if (hdr->hop_limit <= 1) {
368                 /* Force OUTPUT device used as source address */
369                 skb->dev = dst->dev;
370                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
371                 IP6_INC_STATS_BH(net,
372                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
373
374                 kfree_skb(skb);
375                 return -ETIMEDOUT;
376         }
377
378         /* XXX: idev->cnf.proxy_ndp? */
379         if (net->ipv6.devconf_all->proxy_ndp &&
380             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
381                 int proxied = ip6_forward_proxy_check(skb);
382                 if (proxied > 0)
383                         return ip6_input(skb);
384                 else if (proxied < 0) {
385                         IP6_INC_STATS(net, ip6_dst_idev(dst),
386                                       IPSTATS_MIB_INDISCARDS);
387                         goto drop;
388                 }
389         }
390
391         if (!xfrm6_route_forward(skb)) {
392                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
393                 goto drop;
394         }
395         dst = skb_dst(skb);
396
397         /* IPv6 specs say nothing about it, but it is clear that we cannot
398            send redirects to source routed frames.
399            We don't send redirects to frames decapsulated from IPsec.
400          */
401         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
402                 struct in6_addr *target = NULL;
403                 struct inet_peer *peer;
404                 struct rt6_info *rt;
405
406                 /*
407                  *      incoming and outgoing devices are the same
408                  *      send a redirect.
409                  */
410
411                 rt = (struct rt6_info *) dst;
412                 if (rt->rt6i_flags & RTF_GATEWAY)
413                         target = &rt->rt6i_gateway;
414                 else
415                         target = &hdr->daddr;
416
417                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
418
419                 /* Limit redirects both by destination (here)
420                    and by source (inside ndisc_send_redirect)
421                  */
422                 if (inet_peer_xrlim_allow(peer, 1*HZ))
423                         ndisc_send_redirect(skb, target);
424                 if (peer)
425                         inet_putpeer(peer);
426         } else {
427                 int addrtype = ipv6_addr_type(&hdr->saddr);
428
429                 /* This check is security critical. */
430                 if (addrtype == IPV6_ADDR_ANY ||
431                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
432                         goto error;
433                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
434                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
435                                     ICMPV6_NOT_NEIGHBOUR, 0);
436                         goto error;
437                 }
438         }
439
440         mtu = dst_mtu(dst);
441         if (mtu < IPV6_MIN_MTU)
442                 mtu = IPV6_MIN_MTU;
443
444         if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
445             (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
446                 /* Again, force OUTPUT device used as source address */
447                 skb->dev = dst->dev;
448                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
449                 IP6_INC_STATS_BH(net,
450                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
451                 IP6_INC_STATS_BH(net,
452                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
453                 kfree_skb(skb);
454                 return -EMSGSIZE;
455         }
456
457         if (skb_cow(skb, dst->dev->hard_header_len)) {
458                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
459                 goto drop;
460         }
461
462         hdr = ipv6_hdr(skb);
463
464         /* Mangling hops number delayed to point after skb COW */
465
466         hdr->hop_limit--;
467
468         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
469         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
470         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
471                        ip6_forward_finish);
472
473 error:
474         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
475 drop:
476         kfree_skb(skb);
477         return -EINVAL;
478 }
479
480 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
481 {
482         to->pkt_type = from->pkt_type;
483         to->priority = from->priority;
484         to->protocol = from->protocol;
485         skb_dst_drop(to);
486         skb_dst_set(to, dst_clone(skb_dst(from)));
487         to->dev = from->dev;
488         to->mark = from->mark;
489
490 #ifdef CONFIG_NET_SCHED
491         to->tc_index = from->tc_index;
492 #endif
493         nf_copy(to, from);
494 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
495         to->nf_trace = from->nf_trace;
496 #endif
497         skb_copy_secmark(to, from);
498 }
499
500 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
501 {
502         struct sk_buff *frag;
503         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
504         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
505         struct ipv6hdr *tmp_hdr;
506         struct frag_hdr *fh;
507         unsigned int mtu, hlen, left, len;
508         int hroom, troom;
509         __be32 frag_id = 0;
510         int ptr, offset = 0, err=0;
511         u8 *prevhdr, nexthdr = 0;
512         struct net *net = dev_net(skb_dst(skb)->dev);
513
514         hlen = ip6_find_1stfragopt(skb, &prevhdr);
515         nexthdr = *prevhdr;
516
517         mtu = ip6_skb_dst_mtu(skb);
518
519         /* We must not fragment if the socket is set to force MTU discovery
520          * or if the skb it not generated by a local socket.
521          */
522         if (unlikely(!skb->local_df && skb->len > mtu) ||
523                      (IP6CB(skb)->frag_max_size &&
524                       IP6CB(skb)->frag_max_size > mtu)) {
525                 if (skb->sk && dst_allfrag(skb_dst(skb)))
526                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
527
528                 skb->dev = skb_dst(skb)->dev;
529                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
530                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
531                               IPSTATS_MIB_FRAGFAILS);
532                 kfree_skb(skb);
533                 return -EMSGSIZE;
534         }
535
536         if (np && np->frag_size < mtu) {
537                 if (np->frag_size)
538                         mtu = np->frag_size;
539         }
540         mtu -= hlen + sizeof(struct frag_hdr);
541
542         if (skb_has_frag_list(skb)) {
543                 int first_len = skb_pagelen(skb);
544                 struct sk_buff *frag2;
545
546                 if (first_len - hlen > mtu ||
547                     ((first_len - hlen) & 7) ||
548                     skb_cloned(skb))
549                         goto slow_path;
550
551                 skb_walk_frags(skb, frag) {
552                         /* Correct geometry. */
553                         if (frag->len > mtu ||
554                             ((frag->len & 7) && frag->next) ||
555                             skb_headroom(frag) < hlen)
556                                 goto slow_path_clean;
557
558                         /* Partially cloned skb? */
559                         if (skb_shared(frag))
560                                 goto slow_path_clean;
561
562                         BUG_ON(frag->sk);
563                         if (skb->sk) {
564                                 frag->sk = skb->sk;
565                                 frag->destructor = sock_wfree;
566                         }
567                         skb->truesize -= frag->truesize;
568                 }
569
570                 err = 0;
571                 offset = 0;
572                 frag = skb_shinfo(skb)->frag_list;
573                 skb_frag_list_init(skb);
574                 /* BUILD HEADER */
575
576                 *prevhdr = NEXTHDR_FRAGMENT;
577                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
578                 if (!tmp_hdr) {
579                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
580                                       IPSTATS_MIB_FRAGFAILS);
581                         return -ENOMEM;
582                 }
583
584                 __skb_pull(skb, hlen);
585                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
586                 __skb_push(skb, hlen);
587                 skb_reset_network_header(skb);
588                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
589
590                 ipv6_select_ident(fh, rt);
591                 fh->nexthdr = nexthdr;
592                 fh->reserved = 0;
593                 fh->frag_off = htons(IP6_MF);
594                 frag_id = fh->identification;
595
596                 first_len = skb_pagelen(skb);
597                 skb->data_len = first_len - skb_headlen(skb);
598                 skb->len = first_len;
599                 ipv6_hdr(skb)->payload_len = htons(first_len -
600                                                    sizeof(struct ipv6hdr));
601
602                 dst_hold(&rt->dst);
603
604                 for (;;) {
605                         /* Prepare header of the next frame,
606                          * before previous one went down. */
607                         if (frag) {
608                                 frag->ip_summed = CHECKSUM_NONE;
609                                 skb_reset_transport_header(frag);
610                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
611                                 __skb_push(frag, hlen);
612                                 skb_reset_network_header(frag);
613                                 memcpy(skb_network_header(frag), tmp_hdr,
614                                        hlen);
615                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
616                                 fh->nexthdr = nexthdr;
617                                 fh->reserved = 0;
618                                 fh->frag_off = htons(offset);
619                                 if (frag->next != NULL)
620                                         fh->frag_off |= htons(IP6_MF);
621                                 fh->identification = frag_id;
622                                 ipv6_hdr(frag)->payload_len =
623                                                 htons(frag->len -
624                                                       sizeof(struct ipv6hdr));
625                                 ip6_copy_metadata(frag, skb);
626                         }
627
628                         err = output(skb);
629                         if(!err)
630                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
631                                               IPSTATS_MIB_FRAGCREATES);
632
633                         if (err || !frag)
634                                 break;
635
636                         skb = frag;
637                         frag = skb->next;
638                         skb->next = NULL;
639                 }
640
641                 kfree(tmp_hdr);
642
643                 if (err == 0) {
644                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
645                                       IPSTATS_MIB_FRAGOKS);
646                         ip6_rt_put(rt);
647                         return 0;
648                 }
649
650                 while (frag) {
651                         skb = frag->next;
652                         kfree_skb(frag);
653                         frag = skb;
654                 }
655
656                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
657                               IPSTATS_MIB_FRAGFAILS);
658                 ip6_rt_put(rt);
659                 return err;
660
661 slow_path_clean:
662                 skb_walk_frags(skb, frag2) {
663                         if (frag2 == frag)
664                                 break;
665                         frag2->sk = NULL;
666                         frag2->destructor = NULL;
667                         skb->truesize += frag2->truesize;
668                 }
669         }
670
671 slow_path:
672         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
673             skb_checksum_help(skb))
674                 goto fail;
675
676         left = skb->len - hlen;         /* Space per frame */
677         ptr = hlen;                     /* Where to start from */
678
679         /*
680          *      Fragment the datagram.
681          */
682
683         *prevhdr = NEXTHDR_FRAGMENT;
684         hroom = LL_RESERVED_SPACE(rt->dst.dev);
685         troom = rt->dst.dev->needed_tailroom;
686
687         /*
688          *      Keep copying data until we run out.
689          */
690         while(left > 0) {
691                 len = left;
692                 /* IF: it doesn't fit, use 'mtu' - the data space left */
693                 if (len > mtu)
694                         len = mtu;
695                 /* IF: we are not sending up to and including the packet end
696                    then align the next start on an eight byte boundary */
697                 if (len < left) {
698                         len &= ~7;
699                 }
700                 /*
701                  *      Allocate buffer.
702                  */
703
704                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
705                                       hroom + troom, GFP_ATOMIC)) == NULL) {
706                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
707                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
708                                       IPSTATS_MIB_FRAGFAILS);
709                         err = -ENOMEM;
710                         goto fail;
711                 }
712
713                 /*
714                  *      Set up data on packet
715                  */
716
717                 ip6_copy_metadata(frag, skb);
718                 skb_reserve(frag, hroom);
719                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
720                 skb_reset_network_header(frag);
721                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
722                 frag->transport_header = (frag->network_header + hlen +
723                                           sizeof(struct frag_hdr));
724
725                 /*
726                  *      Charge the memory for the fragment to any owner
727                  *      it might possess
728                  */
729                 if (skb->sk)
730                         skb_set_owner_w(frag, skb->sk);
731
732                 /*
733                  *      Copy the packet header into the new buffer.
734                  */
735                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
736
737                 /*
738                  *      Build fragment header.
739                  */
740                 fh->nexthdr = nexthdr;
741                 fh->reserved = 0;
742                 if (!frag_id) {
743                         ipv6_select_ident(fh, rt);
744                         frag_id = fh->identification;
745                 } else
746                         fh->identification = frag_id;
747
748                 /*
749                  *      Copy a block of the IP datagram.
750                  */
751                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
752                         BUG();
753                 left -= len;
754
755                 fh->frag_off = htons(offset);
756                 if (left > 0)
757                         fh->frag_off |= htons(IP6_MF);
758                 ipv6_hdr(frag)->payload_len = htons(frag->len -
759                                                     sizeof(struct ipv6hdr));
760
761                 ptr += len;
762                 offset += len;
763
764                 /*
765                  *      Put this fragment into the sending queue.
766                  */
767                 err = output(frag);
768                 if (err)
769                         goto fail;
770
771                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
772                               IPSTATS_MIB_FRAGCREATES);
773         }
774         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
775                       IPSTATS_MIB_FRAGOKS);
776         consume_skb(skb);
777         return err;
778
779 fail:
780         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
781                       IPSTATS_MIB_FRAGFAILS);
782         kfree_skb(skb);
783         return err;
784 }
785
786 static inline int ip6_rt_check(const struct rt6key *rt_key,
787                                const struct in6_addr *fl_addr,
788                                const struct in6_addr *addr_cache)
789 {
790         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
791                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
792 }
793
794 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
795                                           struct dst_entry *dst,
796                                           const struct flowi6 *fl6)
797 {
798         struct ipv6_pinfo *np = inet6_sk(sk);
799         struct rt6_info *rt;
800
801         if (!dst)
802                 goto out;
803
804         if (dst->ops->family != AF_INET6) {
805                 dst_release(dst);
806                 return NULL;
807         }
808
809         rt = (struct rt6_info *)dst;
810         /* Yes, checking route validity in not connected
811          * case is not very simple. Take into account,
812          * that we do not support routing by source, TOS,
813          * and MSG_DONTROUTE            --ANK (980726)
814          *
815          * 1. ip6_rt_check(): If route was host route,
816          *    check that cached destination is current.
817          *    If it is network route, we still may
818          *    check its validity using saved pointer
819          *    to the last used address: daddr_cache.
820          *    We do not want to save whole address now,
821          *    (because main consumer of this service
822          *    is tcp, which has not this problem),
823          *    so that the last trick works only on connected
824          *    sockets.
825          * 2. oif also should be the same.
826          */
827         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
828 #ifdef CONFIG_IPV6_SUBTREES
829             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
830 #endif
831             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
832                 dst_release(dst);
833                 dst = NULL;
834         }
835
836 out:
837         return dst;
838 }
839
840 static int ip6_dst_lookup_tail(struct sock *sk,
841                                struct dst_entry **dst, struct flowi6 *fl6)
842 {
843         struct net *net = sock_net(sk);
844 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
845         struct neighbour *n;
846         struct rt6_info *rt;
847 #endif
848         int err;
849
850         if (*dst == NULL)
851                 *dst = ip6_route_output(net, sk, fl6);
852
853         if ((err = (*dst)->error))
854                 goto out_err_release;
855
856         if (ipv6_addr_any(&fl6->saddr)) {
857                 struct rt6_info *rt = (struct rt6_info *) *dst;
858                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
859                                           sk ? inet6_sk(sk)->srcprefs : 0,
860                                           &fl6->saddr);
861                 if (err)
862                         goto out_err_release;
863         }
864
865 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
866         /*
867          * Here if the dst entry we've looked up
868          * has a neighbour entry that is in the INCOMPLETE
869          * state and the src address from the flow is
870          * marked as OPTIMISTIC, we release the found
871          * dst entry and replace it instead with the
872          * dst entry of the nexthop router
873          */
874         rt = (struct rt6_info *) *dst;
875         rcu_read_lock_bh();
876         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
877         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
878         rcu_read_unlock_bh();
879
880         if (err) {
881                 struct inet6_ifaddr *ifp;
882                 struct flowi6 fl_gw6;
883                 int redirect;
884
885                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
886                                       (*dst)->dev, 1);
887
888                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
889                 if (ifp)
890                         in6_ifa_put(ifp);
891
892                 if (redirect) {
893                         /*
894                          * We need to get the dst entry for the
895                          * default router instead
896                          */
897                         dst_release(*dst);
898                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
899                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
900                         *dst = ip6_route_output(net, sk, &fl_gw6);
901                         if ((err = (*dst)->error))
902                                 goto out_err_release;
903                 }
904         }
905 #endif
906
907         return 0;
908
909 out_err_release:
910         if (err == -ENETUNREACH)
911                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
912         dst_release(*dst);
913         *dst = NULL;
914         return err;
915 }
916
917 /**
918  *      ip6_dst_lookup - perform route lookup on flow
919  *      @sk: socket which provides route info
920  *      @dst: pointer to dst_entry * for result
921  *      @fl6: flow to lookup
922  *
923  *      This function performs a route lookup on the given flow.
924  *
925  *      It returns zero on success, or a standard errno code on error.
926  */
927 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
928 {
929         *dst = NULL;
930         return ip6_dst_lookup_tail(sk, dst, fl6);
931 }
932 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
933
934 /**
935  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
936  *      @sk: socket which provides route info
937  *      @fl6: flow to lookup
938  *      @final_dst: final destination address for ipsec lookup
939  *      @can_sleep: we are in a sleepable context
940  *
941  *      This function performs a route lookup on the given flow.
942  *
943  *      It returns a valid dst pointer on success, or a pointer encoded
944  *      error code.
945  */
946 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
947                                       const struct in6_addr *final_dst,
948                                       bool can_sleep)
949 {
950         struct dst_entry *dst = NULL;
951         int err;
952
953         err = ip6_dst_lookup_tail(sk, &dst, fl6);
954         if (err)
955                 return ERR_PTR(err);
956         if (final_dst)
957                 fl6->daddr = *final_dst;
958         if (can_sleep)
959                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
960
961         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
962 }
963 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
964
965 /**
966  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
967  *      @sk: socket which provides the dst cache and route info
968  *      @fl6: flow to lookup
969  *      @final_dst: final destination address for ipsec lookup
970  *      @can_sleep: we are in a sleepable context
971  *
972  *      This function performs a route lookup on the given flow with the
973  *      possibility of using the cached route in the socket if it is valid.
974  *      It will take the socket dst lock when operating on the dst cache.
975  *      As a result, this function can only be used in process context.
976  *
977  *      It returns a valid dst pointer on success, or a pointer encoded
978  *      error code.
979  */
980 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
981                                          const struct in6_addr *final_dst,
982                                          bool can_sleep)
983 {
984         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
985         int err;
986
987         dst = ip6_sk_dst_check(sk, dst, fl6);
988
989         err = ip6_dst_lookup_tail(sk, &dst, fl6);
990         if (err)
991                 return ERR_PTR(err);
992         if (final_dst)
993                 fl6->daddr = *final_dst;
994         if (can_sleep)
995                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
996
997         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
998 }
999 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1000
1001 static inline int ip6_ufo_append_data(struct sock *sk,
1002                         int getfrag(void *from, char *to, int offset, int len,
1003                         int odd, struct sk_buff *skb),
1004                         void *from, int length, int hh_len, int fragheaderlen,
1005                         int transhdrlen, int mtu,unsigned int flags,
1006                         struct rt6_info *rt)
1007
1008 {
1009         struct sk_buff *skb;
1010         int err;
1011
1012         /* There is support for UDP large send offload by network
1013          * device, so create one single skb packet containing complete
1014          * udp datagram
1015          */
1016         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1017                 skb = sock_alloc_send_skb(sk,
1018                         hh_len + fragheaderlen + transhdrlen + 20,
1019                         (flags & MSG_DONTWAIT), &err);
1020                 if (skb == NULL)
1021                         return err;
1022
1023                 /* reserve space for Hardware header */
1024                 skb_reserve(skb, hh_len);
1025
1026                 /* create space for UDP/IP header */
1027                 skb_put(skb,fragheaderlen + transhdrlen);
1028
1029                 /* initialize network header pointer */
1030                 skb_reset_network_header(skb);
1031
1032                 /* initialize protocol header pointer */
1033                 skb->transport_header = skb->network_header + fragheaderlen;
1034
1035                 skb->ip_summed = CHECKSUM_PARTIAL;
1036                 skb->csum = 0;
1037         }
1038
1039         err = skb_append_datato_frags(sk,skb, getfrag, from,
1040                                       (length - transhdrlen));
1041         if (!err) {
1042                 struct frag_hdr fhdr;
1043
1044                 /* Specify the length of each IPv6 datagram fragment.
1045                  * It has to be a multiple of 8.
1046                  */
1047                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1048                                              sizeof(struct frag_hdr)) & ~7;
1049                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1050                 ipv6_select_ident(&fhdr, rt);
1051                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1052                 __skb_queue_tail(&sk->sk_write_queue, skb);
1053
1054                 return 0;
1055         }
1056         /* There is not enough support do UPD LSO,
1057          * so follow normal path
1058          */
1059         kfree_skb(skb);
1060
1061         return err;
1062 }
1063
1064 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1065                                                gfp_t gfp)
1066 {
1067         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1068 }
1069
1070 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1071                                                 gfp_t gfp)
1072 {
1073         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1074 }
1075
1076 static void ip6_append_data_mtu(unsigned int *mtu,
1077                                 int *maxfraglen,
1078                                 unsigned int fragheaderlen,
1079                                 struct sk_buff *skb,
1080                                 struct rt6_info *rt,
1081                                 bool pmtuprobe)
1082 {
1083         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1084                 if (skb == NULL) {
1085                         /* first fragment, reserve header_len */
1086                         *mtu = *mtu - rt->dst.header_len;
1087
1088                 } else {
1089                         /*
1090                          * this fragment is not first, the headers
1091                          * space is regarded as data space.
1092                          */
1093                         *mtu = min(*mtu, pmtuprobe ?
1094                                    rt->dst.dev->mtu :
1095                                    dst_mtu(rt->dst.path));
1096                 }
1097                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1098                               + fragheaderlen - sizeof(struct frag_hdr);
1099         }
1100 }
1101
1102 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1103         int offset, int len, int odd, struct sk_buff *skb),
1104         void *from, int length, int transhdrlen,
1105         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1106         struct rt6_info *rt, unsigned int flags, int dontfrag)
1107 {
1108         struct inet_sock *inet = inet_sk(sk);
1109         struct ipv6_pinfo *np = inet6_sk(sk);
1110         struct inet_cork *cork;
1111         struct sk_buff *skb, *skb_prev = NULL;
1112         unsigned int maxfraglen, fragheaderlen, mtu;
1113         int exthdrlen;
1114         int dst_exthdrlen;
1115         int hh_len;
1116         int copy;
1117         int err;
1118         int offset = 0;
1119         __u8 tx_flags = 0;
1120
1121         if (flags&MSG_PROBE)
1122                 return 0;
1123         cork = &inet->cork.base;
1124         if (skb_queue_empty(&sk->sk_write_queue)) {
1125                 /*
1126                  * setup for corking
1127                  */
1128                 if (opt) {
1129                         if (WARN_ON(np->cork.opt))
1130                                 return -EINVAL;
1131
1132                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1133                         if (unlikely(np->cork.opt == NULL))
1134                                 return -ENOBUFS;
1135
1136                         np->cork.opt->tot_len = opt->tot_len;
1137                         np->cork.opt->opt_flen = opt->opt_flen;
1138                         np->cork.opt->opt_nflen = opt->opt_nflen;
1139
1140                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1141                                                             sk->sk_allocation);
1142                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1143                                 return -ENOBUFS;
1144
1145                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1146                                                             sk->sk_allocation);
1147                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1148                                 return -ENOBUFS;
1149
1150                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1151                                                            sk->sk_allocation);
1152                         if (opt->hopopt && !np->cork.opt->hopopt)
1153                                 return -ENOBUFS;
1154
1155                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1156                                                             sk->sk_allocation);
1157                         if (opt->srcrt && !np->cork.opt->srcrt)
1158                                 return -ENOBUFS;
1159
1160                         /* need source address above miyazawa*/
1161                 }
1162                 dst_hold(&rt->dst);
1163                 cork->dst = &rt->dst;
1164                 inet->cork.fl.u.ip6 = *fl6;
1165                 np->cork.hop_limit = hlimit;
1166                 np->cork.tclass = tclass;
1167                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1168                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1169                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1170                 else
1171                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1172                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1173                 if (np->frag_size < mtu) {
1174                         if (np->frag_size)
1175                                 mtu = np->frag_size;
1176                 }
1177                 cork->fragsize = mtu;
1178                 if (dst_allfrag(rt->dst.path))
1179                         cork->flags |= IPCORK_ALLFRAG;
1180                 cork->length = 0;
1181                 exthdrlen = (opt ? opt->opt_flen : 0);
1182                 length += exthdrlen;
1183                 transhdrlen += exthdrlen;
1184                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1185         } else {
1186                 rt = (struct rt6_info *)cork->dst;
1187                 fl6 = &inet->cork.fl.u.ip6;
1188                 opt = np->cork.opt;
1189                 transhdrlen = 0;
1190                 exthdrlen = 0;
1191                 dst_exthdrlen = 0;
1192                 mtu = cork->fragsize;
1193         }
1194
1195         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1196
1197         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1198                         (opt ? opt->opt_nflen : 0);
1199         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1200
1201         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1202                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1203                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1204                         return -EMSGSIZE;
1205                 }
1206         }
1207
1208         /* For UDP, check if TX timestamp is enabled */
1209         if (sk->sk_type == SOCK_DGRAM)
1210                 sock_tx_timestamp(sk, &tx_flags);
1211
1212         /*
1213          * Let's try using as much space as possible.
1214          * Use MTU if total length of the message fits into the MTU.
1215          * Otherwise, we need to reserve fragment header and
1216          * fragment alignment (= 8-15 octects, in total).
1217          *
1218          * Note that we may need to "move" the data from the tail of
1219          * of the buffer to the new fragment when we split
1220          * the message.
1221          *
1222          * FIXME: It may be fragmented into multiple chunks
1223          *        at once if non-fragmentable extension headers
1224          *        are too large.
1225          * --yoshfuji
1226          */
1227
1228         cork->length += length;
1229         if (length > mtu) {
1230                 int proto = sk->sk_protocol;
1231                 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1232                         ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1233                         return -EMSGSIZE;
1234                 }
1235
1236                 if (proto == IPPROTO_UDP &&
1237                     (rt->dst.dev->features & NETIF_F_UFO)) {
1238
1239                         err = ip6_ufo_append_data(sk, getfrag, from, length,
1240                                                   hh_len, fragheaderlen,
1241                                                   transhdrlen, mtu, flags, rt);
1242                         if (err)
1243                                 goto error;
1244                         return 0;
1245                 }
1246         }
1247
1248         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1249                 goto alloc_new_skb;
1250
1251         while (length > 0) {
1252                 /* Check if the remaining data fits into current packet. */
1253                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1254                 if (copy < length)
1255                         copy = maxfraglen - skb->len;
1256
1257                 if (copy <= 0) {
1258                         char *data;
1259                         unsigned int datalen;
1260                         unsigned int fraglen;
1261                         unsigned int fraggap;
1262                         unsigned int alloclen;
1263 alloc_new_skb:
1264                         /* There's no room in the current skb */
1265                         if (skb)
1266                                 fraggap = skb->len - maxfraglen;
1267                         else
1268                                 fraggap = 0;
1269                         /* update mtu and maxfraglen if necessary */
1270                         if (skb == NULL || skb_prev == NULL)
1271                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1272                                                     fragheaderlen, skb, rt,
1273                                                     np->pmtudisc ==
1274                                                     IPV6_PMTUDISC_PROBE);
1275
1276                         skb_prev = skb;
1277
1278                         /*
1279                          * If remaining data exceeds the mtu,
1280                          * we know we need more fragment(s).
1281                          */
1282                         datalen = length + fraggap;
1283
1284                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1285                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1286                         if ((flags & MSG_MORE) &&
1287                             !(rt->dst.dev->features&NETIF_F_SG))
1288                                 alloclen = mtu;
1289                         else
1290                                 alloclen = datalen + fragheaderlen;
1291
1292                         alloclen += dst_exthdrlen;
1293
1294                         if (datalen != length + fraggap) {
1295                                 /*
1296                                  * this is not the last fragment, the trailer
1297                                  * space is regarded as data space.
1298                                  */
1299                                 datalen += rt->dst.trailer_len;
1300                         }
1301
1302                         alloclen += rt->dst.trailer_len;
1303                         fraglen = datalen + fragheaderlen;
1304
1305                         /*
1306                          * We just reserve space for fragment header.
1307                          * Note: this may be overallocation if the message
1308                          * (without MSG_MORE) fits into the MTU.
1309                          */
1310                         alloclen += sizeof(struct frag_hdr);
1311
1312                         if (transhdrlen) {
1313                                 skb = sock_alloc_send_skb(sk,
1314                                                 alloclen + hh_len,
1315                                                 (flags & MSG_DONTWAIT), &err);
1316                         } else {
1317                                 skb = NULL;
1318                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1319                                     2 * sk->sk_sndbuf)
1320                                         skb = sock_wmalloc(sk,
1321                                                            alloclen + hh_len, 1,
1322                                                            sk->sk_allocation);
1323                                 if (unlikely(skb == NULL))
1324                                         err = -ENOBUFS;
1325                                 else {
1326                                         /* Only the initial fragment
1327                                          * is time stamped.
1328                                          */
1329                                         tx_flags = 0;
1330                                 }
1331                         }
1332                         if (skb == NULL)
1333                                 goto error;
1334                         /*
1335                          *      Fill in the control structures
1336                          */
1337                         skb->ip_summed = CHECKSUM_NONE;
1338                         skb->csum = 0;
1339                         /* reserve for fragmentation and ipsec header */
1340                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1341                                     dst_exthdrlen);
1342
1343                         if (sk->sk_type == SOCK_DGRAM)
1344                                 skb_shinfo(skb)->tx_flags = tx_flags;
1345
1346                         /*
1347                          *      Find where to start putting bytes
1348                          */
1349                         data = skb_put(skb, fraglen);
1350                         skb_set_network_header(skb, exthdrlen);
1351                         data += fragheaderlen;
1352                         skb->transport_header = (skb->network_header +
1353                                                  fragheaderlen);
1354                         if (fraggap) {
1355                                 skb->csum = skb_copy_and_csum_bits(
1356                                         skb_prev, maxfraglen,
1357                                         data + transhdrlen, fraggap, 0);
1358                                 skb_prev->csum = csum_sub(skb_prev->csum,
1359                                                           skb->csum);
1360                                 data += fraggap;
1361                                 pskb_trim_unique(skb_prev, maxfraglen);
1362                         }
1363                         copy = datalen - transhdrlen - fraggap;
1364
1365                         if (copy < 0) {
1366                                 err = -EINVAL;
1367                                 kfree_skb(skb);
1368                                 goto error;
1369                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1370                                 err = -EFAULT;
1371                                 kfree_skb(skb);
1372                                 goto error;
1373                         }
1374
1375                         offset += copy;
1376                         length -= datalen - fraggap;
1377                         transhdrlen = 0;
1378                         exthdrlen = 0;
1379                         dst_exthdrlen = 0;
1380
1381                         /*
1382                          * Put the packet on the pending queue
1383                          */
1384                         __skb_queue_tail(&sk->sk_write_queue, skb);
1385                         continue;
1386                 }
1387
1388                 if (copy > length)
1389                         copy = length;
1390
1391                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1392                         unsigned int off;
1393
1394                         off = skb->len;
1395                         if (getfrag(from, skb_put(skb, copy),
1396                                                 offset, copy, off, skb) < 0) {
1397                                 __skb_trim(skb, off);
1398                                 err = -EFAULT;
1399                                 goto error;
1400                         }
1401                 } else {
1402                         int i = skb_shinfo(skb)->nr_frags;
1403                         struct page_frag *pfrag = sk_page_frag(sk);
1404
1405                         err = -ENOMEM;
1406                         if (!sk_page_frag_refill(sk, pfrag))
1407                                 goto error;
1408
1409                         if (!skb_can_coalesce(skb, i, pfrag->page,
1410                                               pfrag->offset)) {
1411                                 err = -EMSGSIZE;
1412                                 if (i == MAX_SKB_FRAGS)
1413                                         goto error;
1414
1415                                 __skb_fill_page_desc(skb, i, pfrag->page,
1416                                                      pfrag->offset, 0);
1417                                 skb_shinfo(skb)->nr_frags = ++i;
1418                                 get_page(pfrag->page);
1419                         }
1420                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1421                         if (getfrag(from,
1422                                     page_address(pfrag->page) + pfrag->offset,
1423                                     offset, copy, skb->len, skb) < 0)
1424                                 goto error_efault;
1425
1426                         pfrag->offset += copy;
1427                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1428                         skb->len += copy;
1429                         skb->data_len += copy;
1430                         skb->truesize += copy;
1431                         atomic_add(copy, &sk->sk_wmem_alloc);
1432                 }
1433                 offset += copy;
1434                 length -= copy;
1435         }
1436
1437         return 0;
1438
1439 error_efault:
1440         err = -EFAULT;
1441 error:
1442         cork->length -= length;
1443         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1444         return err;
1445 }
1446 EXPORT_SYMBOL_GPL(ip6_append_data);
1447
1448 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1449 {
1450         if (np->cork.opt) {
1451                 kfree(np->cork.opt->dst0opt);
1452                 kfree(np->cork.opt->dst1opt);
1453                 kfree(np->cork.opt->hopopt);
1454                 kfree(np->cork.opt->srcrt);
1455                 kfree(np->cork.opt);
1456                 np->cork.opt = NULL;
1457         }
1458
1459         if (inet->cork.base.dst) {
1460                 dst_release(inet->cork.base.dst);
1461                 inet->cork.base.dst = NULL;
1462                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1463         }
1464         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1465 }
1466
1467 int ip6_push_pending_frames(struct sock *sk)
1468 {
1469         struct sk_buff *skb, *tmp_skb;
1470         struct sk_buff **tail_skb;
1471         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1472         struct inet_sock *inet = inet_sk(sk);
1473         struct ipv6_pinfo *np = inet6_sk(sk);
1474         struct net *net = sock_net(sk);
1475         struct ipv6hdr *hdr;
1476         struct ipv6_txoptions *opt = np->cork.opt;
1477         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1478         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1479         unsigned char proto = fl6->flowi6_proto;
1480         int err = 0;
1481
1482         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1483                 goto out;
1484         tail_skb = &(skb_shinfo(skb)->frag_list);
1485
1486         /* move skb->data to ip header from ext header */
1487         if (skb->data < skb_network_header(skb))
1488                 __skb_pull(skb, skb_network_offset(skb));
1489         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1490                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1491                 *tail_skb = tmp_skb;
1492                 tail_skb = &(tmp_skb->next);
1493                 skb->len += tmp_skb->len;
1494                 skb->data_len += tmp_skb->len;
1495                 skb->truesize += tmp_skb->truesize;
1496                 tmp_skb->destructor = NULL;
1497                 tmp_skb->sk = NULL;
1498         }
1499
1500         /* Allow local fragmentation. */
1501         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1502                 skb->local_df = 1;
1503
1504         *final_dst = fl6->daddr;
1505         __skb_pull(skb, skb_network_header_len(skb));
1506         if (opt && opt->opt_flen)
1507                 ipv6_push_frag_opts(skb, opt, &proto);
1508         if (opt && opt->opt_nflen)
1509                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1510
1511         skb_push(skb, sizeof(struct ipv6hdr));
1512         skb_reset_network_header(skb);
1513         hdr = ipv6_hdr(skb);
1514
1515         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1516         hdr->hop_limit = np->cork.hop_limit;
1517         hdr->nexthdr = proto;
1518         hdr->saddr = fl6->saddr;
1519         hdr->daddr = *final_dst;
1520
1521         skb->priority = sk->sk_priority;
1522         skb->mark = sk->sk_mark;
1523
1524         skb_dst_set(skb, dst_clone(&rt->dst));
1525         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1526         if (proto == IPPROTO_ICMPV6) {
1527                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1528
1529                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1530                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1531         }
1532
1533         err = ip6_local_out(skb);
1534         if (err) {
1535                 if (err > 0)
1536                         err = net_xmit_errno(err);
1537                 if (err)
1538                         goto error;
1539         }
1540
1541 out:
1542         ip6_cork_release(inet, np);
1543         return err;
1544 error:
1545         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1546         goto out;
1547 }
1548 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1549
1550 void ip6_flush_pending_frames(struct sock *sk)
1551 {
1552         struct sk_buff *skb;
1553
1554         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1555                 if (skb_dst(skb))
1556                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1557                                       IPSTATS_MIB_OUTDISCARDS);
1558                 kfree_skb(skb);
1559         }
1560
1561         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1562 }
1563 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);