ip_tunnels: extend iptunnel_xmit()
[firefly-linux-kernel-4.4.55.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307
308         err = register_netdevice(dev);
309         if (err)
310                 goto failed_free;
311
312         return dev;
313
314 failed_free:
315         free_netdev(dev);
316 failed:
317         return ERR_PTR(err);
318 }
319
320 static inline struct rtable *ip_route_output_tunnel(struct net *net,
321                                                     struct flowi4 *fl4,
322                                                     int proto,
323                                                     __be32 daddr, __be32 saddr,
324                                                     __be32 key, __u8 tos, int oif)
325 {
326         memset(fl4, 0, sizeof(*fl4));
327         fl4->flowi4_oif = oif;
328         fl4->daddr = daddr;
329         fl4->saddr = saddr;
330         fl4->flowi4_tos = tos;
331         fl4->flowi4_proto = proto;
332         fl4->fl4_gre_key = key;
333         return ip_route_output_key(net, fl4);
334 }
335
336 static int ip_tunnel_bind_dev(struct net_device *dev)
337 {
338         struct net_device *tdev = NULL;
339         struct ip_tunnel *tunnel = netdev_priv(dev);
340         const struct iphdr *iph;
341         int hlen = LL_MAX_HEADER;
342         int mtu = ETH_DATA_LEN;
343         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345         iph = &tunnel->parms.iph;
346
347         /* Guess output device to choose reasonable mtu and needed_headroom */
348         if (iph->daddr) {
349                 struct flowi4 fl4;
350                 struct rtable *rt;
351
352                 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353                                             tunnel->parms.iph.protocol,
354                                             iph->daddr, iph->saddr,
355                                             tunnel->parms.o_key,
356                                             RT_TOS(iph->tos),
357                                             tunnel->parms.link);
358                 if (!IS_ERR(rt)) {
359                         tdev = rt->dst.dev;
360                         ip_rt_put(rt);
361                 }
362                 if (dev->type != ARPHRD_ETHER)
363                         dev->flags |= IFF_POINTOPOINT;
364         }
365
366         if (!tdev && tunnel->parms.link)
367                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369         if (tdev) {
370                 hlen = tdev->hard_header_len + tdev->needed_headroom;
371                 mtu = tdev->mtu;
372         }
373         dev->iflink = tunnel->parms.link;
374
375         dev->needed_headroom = t_hlen + hlen;
376         mtu -= (dev->hard_header_len + t_hlen);
377
378         if (mtu < 68)
379                 mtu = 68;
380
381         return mtu;
382 }
383
384 static struct ip_tunnel *ip_tunnel_create(struct net *net,
385                                           struct ip_tunnel_net *itn,
386                                           struct ip_tunnel_parm *parms)
387 {
388         struct ip_tunnel *nt, *fbt;
389         struct net_device *dev;
390
391         BUG_ON(!itn->fb_tunnel_dev);
392         fbt = netdev_priv(itn->fb_tunnel_dev);
393         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394         if (IS_ERR(dev))
395                 return NULL;
396
397         dev->mtu = ip_tunnel_bind_dev(dev);
398
399         nt = netdev_priv(dev);
400         ip_tunnel_add(itn, nt);
401         return nt;
402 }
403
404 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
406 {
407         struct pcpu_tstats *tstats;
408         const struct iphdr *iph = ip_hdr(skb);
409         int err;
410
411         secpath_reset(skb);
412
413         skb->protocol = tpi->proto;
414
415         skb->mac_header = skb->network_header;
416         __pskb_pull(skb, tunnel->hlen);
417         skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418 #ifdef CONFIG_NET_IPGRE_BROADCAST
419         if (ipv4_is_multicast(iph->daddr)) {
420                 /* Looped back packet, drop it! */
421                 if (rt_is_output_route(skb_rtable(skb)))
422                         goto drop;
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         /* Warning: All skb pointers will be invalidated! */
446         if (tunnel->dev->type == ARPHRD_ETHER) {
447                 if (!pskb_may_pull(skb, ETH_HLEN)) {
448                         tunnel->dev->stats.rx_length_errors++;
449                         tunnel->dev->stats.rx_errors++;
450                         goto drop;
451                 }
452
453                 iph = ip_hdr(skb);
454                 skb->protocol = eth_type_trans(skb, tunnel->dev);
455                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456         }
457
458         skb->pkt_type = PACKET_HOST;
459         __skb_tunnel_rx(skb, tunnel->dev);
460
461         skb_reset_network_header(skb);
462         err = IP_ECN_decapsulate(iph, skb);
463         if (unlikely(err)) {
464                 if (log_ecn_error)
465                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466                                         &iph->saddr, iph->tos);
467                 if (err > 1) {
468                         ++tunnel->dev->stats.rx_frame_errors;
469                         ++tunnel->dev->stats.rx_errors;
470                         goto drop;
471                 }
472         }
473
474         tstats = this_cpu_ptr(tunnel->dev->tstats);
475         u64_stats_update_begin(&tstats->syncp);
476         tstats->rx_packets++;
477         tstats->rx_bytes += skb->len;
478         u64_stats_update_end(&tstats->syncp);
479
480         gro_cells_receive(&tunnel->gro_cells, skb);
481         return 0;
482
483 drop:
484         kfree_skb(skb);
485         return 0;
486 }
487 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490                     const struct iphdr *tnl_params, const u8 protocol)
491 {
492         struct ip_tunnel *tunnel = netdev_priv(dev);
493         const struct iphdr *inner_iph;
494         struct flowi4 fl4;
495         u8     tos, ttl;
496         __be16 df;
497         struct rtable *rt;              /* Route to the other host */
498         unsigned int max_headroom;      /* The extra header space needed */
499         __be32 dst;
500         int mtu;
501         int err;
502
503         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
504
505         dst = tnl_params->daddr;
506         if (dst == 0) {
507                 /* NBMA tunnel */
508
509                 if (skb_dst(skb) == NULL) {
510                         dev->stats.tx_fifo_errors++;
511                         goto tx_error;
512                 }
513
514                 if (skb->protocol == htons(ETH_P_IP)) {
515                         rt = skb_rtable(skb);
516                         dst = rt_nexthop(rt, inner_iph->daddr);
517                 }
518 #if IS_ENABLED(CONFIG_IPV6)
519                 else if (skb->protocol == htons(ETH_P_IPV6)) {
520                         const struct in6_addr *addr6;
521                         struct neighbour *neigh;
522                         bool do_tx_error_icmp;
523                         int addr_type;
524
525                         neigh = dst_neigh_lookup(skb_dst(skb),
526                                                  &ipv6_hdr(skb)->daddr);
527                         if (neigh == NULL)
528                                 goto tx_error;
529
530                         addr6 = (const struct in6_addr *)&neigh->primary_key;
531                         addr_type = ipv6_addr_type(addr6);
532
533                         if (addr_type == IPV6_ADDR_ANY) {
534                                 addr6 = &ipv6_hdr(skb)->daddr;
535                                 addr_type = ipv6_addr_type(addr6);
536                         }
537
538                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
539                                 do_tx_error_icmp = true;
540                         else {
541                                 do_tx_error_icmp = false;
542                                 dst = addr6->s6_addr32[3];
543                         }
544                         neigh_release(neigh);
545                         if (do_tx_error_icmp)
546                                 goto tx_error_icmp;
547                 }
548 #endif
549                 else
550                         goto tx_error;
551         }
552
553         tos = tnl_params->tos;
554         if (tos & 0x1) {
555                 tos &= ~0x1;
556                 if (skb->protocol == htons(ETH_P_IP))
557                         tos = inner_iph->tos;
558                 else if (skb->protocol == htons(ETH_P_IPV6))
559                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
560         }
561
562         rt = ip_route_output_tunnel(dev_net(dev), &fl4,
563                                     tunnel->parms.iph.protocol,
564                                     dst, tnl_params->saddr,
565                                     tunnel->parms.o_key,
566                                     RT_TOS(tos),
567                                     tunnel->parms.link);
568         if (IS_ERR(rt)) {
569                 dev->stats.tx_carrier_errors++;
570                 goto tx_error;
571         }
572         if (rt->dst.dev == dev) {
573                 ip_rt_put(rt);
574                 dev->stats.collisions++;
575                 goto tx_error;
576         }
577         df = tnl_params->frag_off;
578
579         if (df)
580                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
581                                         - sizeof(struct iphdr);
582         else
583                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
584
585         if (skb_dst(skb))
586                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
587
588         if (skb->protocol == htons(ETH_P_IP)) {
589                 df |= (inner_iph->frag_off&htons(IP_DF));
590
591                 if (!skb_is_gso(skb) &&
592                     (inner_iph->frag_off&htons(IP_DF)) &&
593                      mtu < ntohs(inner_iph->tot_len)) {
594                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
595                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
596                         ip_rt_put(rt);
597                         goto tx_error;
598                 }
599         }
600 #if IS_ENABLED(CONFIG_IPV6)
601         else if (skb->protocol == htons(ETH_P_IPV6)) {
602                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
603
604                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
605                     mtu >= IPV6_MIN_MTU) {
606                         if ((tunnel->parms.iph.daddr &&
607                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
608                             rt6->rt6i_dst.plen == 128) {
609                                 rt6->rt6i_flags |= RTF_MODIFIED;
610                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
611                         }
612                 }
613
614                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
615                     mtu < skb->len) {
616                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
617                         ip_rt_put(rt);
618                         goto tx_error;
619                 }
620         }
621 #endif
622
623         if (tunnel->err_count > 0) {
624                 if (time_before(jiffies,
625                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
626                         tunnel->err_count--;
627
628                         dst_link_failure(skb);
629                 } else
630                         tunnel->err_count = 0;
631         }
632
633         ttl = tnl_params->ttl;
634         if (ttl == 0) {
635                 if (skb->protocol == htons(ETH_P_IP))
636                         ttl = inner_iph->ttl;
637 #if IS_ENABLED(CONFIG_IPV6)
638                 else if (skb->protocol == htons(ETH_P_IPV6))
639                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
640 #endif
641                 else
642                         ttl = ip4_dst_hoplimit(&rt->dst);
643         }
644
645         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
646                         + rt->dst.header_len;
647         if (max_headroom > dev->needed_headroom) {
648                 dev->needed_headroom = max_headroom;
649                 if (skb_cow_head(skb, dev->needed_headroom)) {
650                         dev->stats.tx_dropped++;
651                         dev_kfree_skb(skb);
652                         return;
653                 }
654         }
655
656         err = iptunnel_xmit(dev_net(dev), rt, skb,
657                             fl4.saddr, fl4.daddr, protocol,
658                             ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df);
659         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
660
661         return;
662
663 #if IS_ENABLED(CONFIG_IPV6)
664 tx_error_icmp:
665         dst_link_failure(skb);
666 #endif
667 tx_error:
668         dev->stats.tx_errors++;
669         dev_kfree_skb(skb);
670 }
671 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
672
673 static void ip_tunnel_update(struct ip_tunnel_net *itn,
674                              struct ip_tunnel *t,
675                              struct net_device *dev,
676                              struct ip_tunnel_parm *p,
677                              bool set_mtu)
678 {
679         ip_tunnel_del(t);
680         t->parms.iph.saddr = p->iph.saddr;
681         t->parms.iph.daddr = p->iph.daddr;
682         t->parms.i_key = p->i_key;
683         t->parms.o_key = p->o_key;
684         if (dev->type != ARPHRD_ETHER) {
685                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
686                 memcpy(dev->broadcast, &p->iph.daddr, 4);
687         }
688         ip_tunnel_add(itn, t);
689
690         t->parms.iph.ttl = p->iph.ttl;
691         t->parms.iph.tos = p->iph.tos;
692         t->parms.iph.frag_off = p->iph.frag_off;
693
694         if (t->parms.link != p->link) {
695                 int mtu;
696
697                 t->parms.link = p->link;
698                 mtu = ip_tunnel_bind_dev(dev);
699                 if (set_mtu)
700                         dev->mtu = mtu;
701         }
702         netdev_state_change(dev);
703 }
704
705 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
706 {
707         int err = 0;
708         struct ip_tunnel *t;
709         struct net *net = dev_net(dev);
710         struct ip_tunnel *tunnel = netdev_priv(dev);
711         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
712
713         BUG_ON(!itn->fb_tunnel_dev);
714         switch (cmd) {
715         case SIOCGETTUNNEL:
716                 t = NULL;
717                 if (dev == itn->fb_tunnel_dev)
718                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
719                 if (t == NULL)
720                         t = netdev_priv(dev);
721                 memcpy(p, &t->parms, sizeof(*p));
722                 break;
723
724         case SIOCADDTUNNEL:
725         case SIOCCHGTUNNEL:
726                 err = -EPERM;
727                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728                         goto done;
729                 if (p->iph.ttl)
730                         p->iph.frag_off |= htons(IP_DF);
731                 if (!(p->i_flags&TUNNEL_KEY))
732                         p->i_key = 0;
733                 if (!(p->o_flags&TUNNEL_KEY))
734                         p->o_key = 0;
735
736                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
737
738                 if (!t && (cmd == SIOCADDTUNNEL))
739                         t = ip_tunnel_create(net, itn, p);
740
741                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
742                         if (t != NULL) {
743                                 if (t->dev != dev) {
744                                         err = -EEXIST;
745                                         break;
746                                 }
747                         } else {
748                                 unsigned int nflags = 0;
749
750                                 if (ipv4_is_multicast(p->iph.daddr))
751                                         nflags = IFF_BROADCAST;
752                                 else if (p->iph.daddr)
753                                         nflags = IFF_POINTOPOINT;
754
755                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
756                                         err = -EINVAL;
757                                         break;
758                                 }
759
760                                 t = netdev_priv(dev);
761                         }
762                 }
763
764                 if (t) {
765                         err = 0;
766                         ip_tunnel_update(itn, t, dev, p, true);
767                 } else
768                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
769                 break;
770
771         case SIOCDELTUNNEL:
772                 err = -EPERM;
773                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
774                         goto done;
775
776                 if (dev == itn->fb_tunnel_dev) {
777                         err = -ENOENT;
778                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
779                         if (t == NULL)
780                                 goto done;
781                         err = -EPERM;
782                         if (t == netdev_priv(itn->fb_tunnel_dev))
783                                 goto done;
784                         dev = t->dev;
785                 }
786                 unregister_netdevice(dev);
787                 err = 0;
788                 break;
789
790         default:
791                 err = -EINVAL;
792         }
793
794 done:
795         return err;
796 }
797 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
798
799 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
800 {
801         struct ip_tunnel *tunnel = netdev_priv(dev);
802         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
803
804         if (new_mtu < 68 ||
805             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
806                 return -EINVAL;
807         dev->mtu = new_mtu;
808         return 0;
809 }
810 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
811
812 static void ip_tunnel_dev_free(struct net_device *dev)
813 {
814         struct ip_tunnel *tunnel = netdev_priv(dev);
815
816         gro_cells_destroy(&tunnel->gro_cells);
817         free_percpu(dev->tstats);
818         free_netdev(dev);
819 }
820
821 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
822 {
823         struct net *net = dev_net(dev);
824         struct ip_tunnel *tunnel = netdev_priv(dev);
825         struct ip_tunnel_net *itn;
826
827         itn = net_generic(net, tunnel->ip_tnl_net_id);
828
829         if (itn->fb_tunnel_dev != dev) {
830                 ip_tunnel_del(netdev_priv(dev));
831                 unregister_netdevice_queue(dev, head);
832         }
833 }
834 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
835
836 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
837                                   struct rtnl_link_ops *ops, char *devname)
838 {
839         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
840         struct ip_tunnel_parm parms;
841
842         itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
843         if (!itn->tunnels)
844                 return -ENOMEM;
845
846         if (!ops) {
847                 itn->fb_tunnel_dev = NULL;
848                 return 0;
849         }
850         memset(&parms, 0, sizeof(parms));
851         if (devname)
852                 strlcpy(parms.name, devname, IFNAMSIZ);
853
854         rtnl_lock();
855         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
856         rtnl_unlock();
857         if (IS_ERR(itn->fb_tunnel_dev)) {
858                 kfree(itn->tunnels);
859                 return PTR_ERR(itn->fb_tunnel_dev);
860         }
861
862         return 0;
863 }
864 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
865
866 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
867 {
868         int h;
869
870         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
871                 struct ip_tunnel *t;
872                 struct hlist_node *n;
873                 struct hlist_head *thead = &itn->tunnels[h];
874
875                 hlist_for_each_entry_safe(t, n, thead, hash_node)
876                         unregister_netdevice_queue(t->dev, head);
877         }
878         if (itn->fb_tunnel_dev)
879                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
880 }
881
882 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
883 {
884         LIST_HEAD(list);
885
886         rtnl_lock();
887         ip_tunnel_destroy(itn, &list);
888         unregister_netdevice_many(&list);
889         rtnl_unlock();
890         kfree(itn->tunnels);
891 }
892 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
893
894 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
895                       struct ip_tunnel_parm *p)
896 {
897         struct ip_tunnel *nt;
898         struct net *net = dev_net(dev);
899         struct ip_tunnel_net *itn;
900         int mtu;
901         int err;
902
903         nt = netdev_priv(dev);
904         itn = net_generic(net, nt->ip_tnl_net_id);
905
906         if (ip_tunnel_find(itn, p, dev->type))
907                 return -EEXIST;
908
909         nt->parms = *p;
910         err = register_netdevice(dev);
911         if (err)
912                 goto out;
913
914         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
915                 eth_hw_addr_random(dev);
916
917         mtu = ip_tunnel_bind_dev(dev);
918         if (!tb[IFLA_MTU])
919                 dev->mtu = mtu;
920
921         ip_tunnel_add(itn, nt);
922
923 out:
924         return err;
925 }
926 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
927
928 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
929                          struct ip_tunnel_parm *p)
930 {
931         struct ip_tunnel *t, *nt;
932         struct net *net = dev_net(dev);
933         struct ip_tunnel *tunnel = netdev_priv(dev);
934         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
935
936         if (dev == itn->fb_tunnel_dev)
937                 return -EINVAL;
938
939         nt = netdev_priv(dev);
940
941         t = ip_tunnel_find(itn, p, dev->type);
942
943         if (t) {
944                 if (t->dev != dev)
945                         return -EEXIST;
946         } else {
947                 t = nt;
948
949                 if (dev->type != ARPHRD_ETHER) {
950                         unsigned int nflags = 0;
951
952                         if (ipv4_is_multicast(p->iph.daddr))
953                                 nflags = IFF_BROADCAST;
954                         else if (p->iph.daddr)
955                                 nflags = IFF_POINTOPOINT;
956
957                         if ((dev->flags ^ nflags) &
958                             (IFF_POINTOPOINT | IFF_BROADCAST))
959                                 return -EINVAL;
960                 }
961         }
962
963         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
964         return 0;
965 }
966 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
967
968 int ip_tunnel_init(struct net_device *dev)
969 {
970         struct ip_tunnel *tunnel = netdev_priv(dev);
971         struct iphdr *iph = &tunnel->parms.iph;
972         int err;
973
974         dev->destructor = ip_tunnel_dev_free;
975         dev->tstats = alloc_percpu(struct pcpu_tstats);
976         if (!dev->tstats)
977                 return -ENOMEM;
978
979         err = gro_cells_init(&tunnel->gro_cells, dev);
980         if (err) {
981                 free_percpu(dev->tstats);
982                 return err;
983         }
984
985         tunnel->dev = dev;
986         strcpy(tunnel->parms.name, dev->name);
987         iph->version            = 4;
988         iph->ihl                = 5;
989
990         return 0;
991 }
992 EXPORT_SYMBOL_GPL(ip_tunnel_init);
993
994 void ip_tunnel_uninit(struct net_device *dev)
995 {
996         struct net *net = dev_net(dev);
997         struct ip_tunnel *tunnel = netdev_priv(dev);
998         struct ip_tunnel_net *itn;
999
1000         itn = net_generic(net, tunnel->ip_tnl_net_id);
1001         /* fb_tunnel_dev will be unregisted in net-exit call. */
1002         if (itn->fb_tunnel_dev != dev)
1003                 ip_tunnel_del(netdev_priv(dev));
1004 }
1005 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1006
1007 /* Do least required initialization, rest of init is done in tunnel_init call */
1008 void ip_tunnel_setup(struct net_device *dev, int net_id)
1009 {
1010         struct ip_tunnel *tunnel = netdev_priv(dev);
1011         tunnel->ip_tnl_net_id = net_id;
1012 }
1013 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1014
1015 MODULE_LICENSE("GPL");