2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
75 struct dst_entry *old_dst;
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
89 static void tunnel_dst_reset(struct ip_tunnel *t)
91 tunnel_dst_set(t, NULL, 0);
94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
124 return (struct rtable *)dst;
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
134 /* key expected, none present */
137 return !(flags & TUNNEL_KEY);
140 /* Fallback tunnel: no source, no destination, no key, no options
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
172 if (t->parms.link == link)
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
187 if (t->parms.link == link)
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
201 if (!(t->dev->flags & IFF_UP))
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 if (t->parms.link == link)
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
223 if (t->parms.link == link)
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
246 __be32 i_key = parms->i_key;
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256 h = ip_tunnel_hash(i_key, remote);
257 return &itn->tunnels[h];
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
264 hlist_add_head_rcu(&t->hash_node, head);
267 static void ip_tunnel_del(struct ip_tunnel *t)
269 hlist_del_init_rcu(&t->hash_node);
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
279 __be16 flags = parms->i_flags;
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
287 link == t->parms.link &&
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
295 static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
305 strlcpy(name, parms->name, IFNAMSIZ);
307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
321 dev_net_set(dev, net);
323 dev->rtnl_link_ops = ops;
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
329 err = register_netdevice(dev);
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
355 static int ip_tunnel_bind_dev(struct net_device *dev)
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364 iph = &tunnel->parms.iph;
366 /* Guess output device to choose reasonable mtu and needed_headroom */
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
385 if (!tdev && tunnel->parms.link)
386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
406 struct ip_tunnel *nt;
407 struct net_device *dev;
409 BUG_ON(!itn->fb_tunnel_dev);
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
412 return ERR_CAST(dev);
414 dev->mtu = ip_tunnel_bind_dev(dev);
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
424 struct pcpu_sw_netstats *tstats;
425 const struct iphdr *iph = ip_hdr(skb);
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 skb_reset_network_header(skb);
454 err = IP_ECN_decapsulate(iph, skb);
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
478 skb->dev = tunnel->dev;
481 gro_cells_receive(&tunnel->gro_cells, skb);
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490 static int ip_encap_hlen(struct ip_tunnel_encap *e)
492 const struct ip_tunnel_encap_ops *ops;
495 if (e->type == TUNNEL_ENCAP_NONE)
498 if (e->type >= MAX_IPTUN_ENCAP_OPS)
502 ops = rcu_dereference(iptun_encaps[e->type]);
503 if (likely(ops && ops->encap_hlen))
504 hlen = ops->encap_hlen(e);
510 const struct ip_tunnel_encap_ops __rcu *
511 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
513 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
516 if (num >= MAX_IPTUN_ENCAP_OPS)
519 return !cmpxchg((const struct ip_tunnel_encap_ops **)
523 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
525 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
530 if (num >= MAX_IPTUN_ENCAP_OPS)
533 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
535 ops, NULL) == ops) ? 0 : -1;
541 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
543 int ip_tunnel_encap_setup(struct ip_tunnel *t,
544 struct ip_tunnel_encap *ipencap)
548 memset(&t->encap, 0, sizeof(t->encap));
550 hlen = ip_encap_hlen(ipencap);
554 t->encap.type = ipencap->type;
555 t->encap.sport = ipencap->sport;
556 t->encap.dport = ipencap->dport;
557 t->encap.flags = ipencap->flags;
559 t->encap_hlen = hlen;
560 t->hlen = t->encap_hlen + t->tun_hlen;
564 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
566 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
567 u8 *protocol, struct flowi4 *fl4)
569 const struct ip_tunnel_encap_ops *ops;
572 if (t->encap.type == TUNNEL_ENCAP_NONE)
575 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
579 ops = rcu_dereference(iptun_encaps[t->encap.type]);
580 if (likely(ops && ops->build_header))
581 ret = ops->build_header(skb, &t->encap, protocol, fl4);
586 EXPORT_SYMBOL(ip_tunnel_encap);
588 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
589 struct rtable *rt, __be16 df,
590 const struct iphdr *inner_iph)
592 struct ip_tunnel *tunnel = netdev_priv(dev);
593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
597 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598 - sizeof(struct iphdr) - tunnel->hlen;
600 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
605 if (skb->protocol == htons(ETH_P_IP)) {
606 if (!skb_is_gso(skb) &&
607 (inner_iph->frag_off & htons(IP_DF)) &&
609 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
610 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
614 #if IS_ENABLED(CONFIG_IPV6)
615 else if (skb->protocol == htons(ETH_P_IPV6)) {
616 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
618 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
619 mtu >= IPV6_MIN_MTU) {
620 if ((tunnel->parms.iph.daddr &&
621 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
622 rt6->rt6i_dst.plen == 128) {
623 rt6->rt6i_flags |= RTF_MODIFIED;
624 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
628 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
630 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
638 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
639 const struct iphdr *tnl_params, u8 protocol)
641 struct ip_tunnel *tunnel = netdev_priv(dev);
642 const struct iphdr *inner_iph;
646 struct rtable *rt; /* Route to the other host */
647 unsigned int max_headroom; /* The extra header space needed */
652 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653 connected = (tunnel->parms.iph.daddr != 0);
655 dst = tnl_params->daddr;
660 dev->stats.tx_fifo_errors++;
664 if (skb->protocol == htons(ETH_P_IP)) {
665 rt = skb_rtable(skb);
666 dst = rt_nexthop(rt, inner_iph->daddr);
668 #if IS_ENABLED(CONFIG_IPV6)
669 else if (skb->protocol == htons(ETH_P_IPV6)) {
670 const struct in6_addr *addr6;
671 struct neighbour *neigh;
672 bool do_tx_error_icmp;
675 neigh = dst_neigh_lookup(skb_dst(skb),
676 &ipv6_hdr(skb)->daddr);
680 addr6 = (const struct in6_addr *)&neigh->primary_key;
681 addr_type = ipv6_addr_type(addr6);
683 if (addr_type == IPV6_ADDR_ANY) {
684 addr6 = &ipv6_hdr(skb)->daddr;
685 addr_type = ipv6_addr_type(addr6);
688 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
689 do_tx_error_icmp = true;
691 do_tx_error_icmp = false;
692 dst = addr6->s6_addr32[3];
694 neigh_release(neigh);
695 if (do_tx_error_icmp)
705 tos = tnl_params->tos;
708 if (skb->protocol == htons(ETH_P_IP)) {
709 tos = inner_iph->tos;
711 } else if (skb->protocol == htons(ETH_P_IPV6)) {
712 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
717 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
718 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
720 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
723 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
726 rt = ip_route_output_key(tunnel->net, &fl4);
729 dev->stats.tx_carrier_errors++;
733 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
736 if (rt->dst.dev == dev) {
738 dev->stats.collisions++;
742 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
747 if (tunnel->err_count > 0) {
748 if (time_before(jiffies,
749 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
752 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
753 dst_link_failure(skb);
755 tunnel->err_count = 0;
758 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
759 ttl = tnl_params->ttl;
761 if (skb->protocol == htons(ETH_P_IP))
762 ttl = inner_iph->ttl;
763 #if IS_ENABLED(CONFIG_IPV6)
764 else if (skb->protocol == htons(ETH_P_IPV6))
765 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
768 ttl = ip4_dst_hoplimit(&rt->dst);
771 df = tnl_params->frag_off;
772 if (skb->protocol == htons(ETH_P_IP))
773 df |= (inner_iph->frag_off&htons(IP_DF));
775 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
776 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
777 if (max_headroom > dev->needed_headroom)
778 dev->needed_headroom = max_headroom;
780 if (skb_cow_head(skb, dev->needed_headroom)) {
782 dev->stats.tx_dropped++;
787 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
788 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
789 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
793 #if IS_ENABLED(CONFIG_IPV6)
795 dst_link_failure(skb);
798 dev->stats.tx_errors++;
801 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
803 static void ip_tunnel_update(struct ip_tunnel_net *itn,
805 struct net_device *dev,
806 struct ip_tunnel_parm *p,
810 t->parms.iph.saddr = p->iph.saddr;
811 t->parms.iph.daddr = p->iph.daddr;
812 t->parms.i_key = p->i_key;
813 t->parms.o_key = p->o_key;
814 if (dev->type != ARPHRD_ETHER) {
815 memcpy(dev->dev_addr, &p->iph.saddr, 4);
816 memcpy(dev->broadcast, &p->iph.daddr, 4);
818 ip_tunnel_add(itn, t);
820 t->parms.iph.ttl = p->iph.ttl;
821 t->parms.iph.tos = p->iph.tos;
822 t->parms.iph.frag_off = p->iph.frag_off;
824 if (t->parms.link != p->link) {
827 t->parms.link = p->link;
828 mtu = ip_tunnel_bind_dev(dev);
832 ip_tunnel_dst_reset_all(t);
833 netdev_state_change(dev);
836 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
839 struct ip_tunnel *t = netdev_priv(dev);
840 struct net *net = t->net;
841 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
843 BUG_ON(!itn->fb_tunnel_dev);
846 if (dev == itn->fb_tunnel_dev) {
847 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
849 t = netdev_priv(dev);
851 memcpy(p, &t->parms, sizeof(*p));
857 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
860 p->iph.frag_off |= htons(IP_DF);
861 if (!(p->i_flags & VTI_ISVTI)) {
862 if (!(p->i_flags & TUNNEL_KEY))
864 if (!(p->o_flags & TUNNEL_KEY))
868 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870 if (cmd == SIOCADDTUNNEL) {
872 t = ip_tunnel_create(net, itn, p);
873 err = PTR_ERR_OR_ZERO(t);
880 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
887 unsigned int nflags = 0;
889 if (ipv4_is_multicast(p->iph.daddr))
890 nflags = IFF_BROADCAST;
891 else if (p->iph.daddr)
892 nflags = IFF_POINTOPOINT;
894 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
899 t = netdev_priv(dev);
905 ip_tunnel_update(itn, t, dev, p, true);
913 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916 if (dev == itn->fb_tunnel_dev) {
918 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
922 if (t == netdev_priv(itn->fb_tunnel_dev))
926 unregister_netdevice(dev);
937 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
939 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
941 struct ip_tunnel *tunnel = netdev_priv(dev);
942 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
945 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
950 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
952 static void ip_tunnel_dev_free(struct net_device *dev)
954 struct ip_tunnel *tunnel = netdev_priv(dev);
956 gro_cells_destroy(&tunnel->gro_cells);
957 free_percpu(tunnel->dst_cache);
958 free_percpu(dev->tstats);
962 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
964 struct ip_tunnel *tunnel = netdev_priv(dev);
965 struct ip_tunnel_net *itn;
967 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
969 if (itn->fb_tunnel_dev != dev) {
970 ip_tunnel_del(netdev_priv(dev));
971 unregister_netdevice_queue(dev, head);
974 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
976 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
978 struct ip_tunnel *tunnel = netdev_priv(dev);
982 EXPORT_SYMBOL(ip_tunnel_get_link_net);
984 int ip_tunnel_get_iflink(const struct net_device *dev)
986 struct ip_tunnel *tunnel = netdev_priv(dev);
988 return tunnel->parms.link;
990 EXPORT_SYMBOL(ip_tunnel_get_iflink);
992 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
993 struct rtnl_link_ops *ops, char *devname)
995 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
996 struct ip_tunnel_parm parms;
999 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1000 INIT_HLIST_HEAD(&itn->tunnels[i]);
1003 itn->fb_tunnel_dev = NULL;
1007 memset(&parms, 0, sizeof(parms));
1009 strlcpy(parms.name, devname, IFNAMSIZ);
1012 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1013 /* FB netdevice is special: we have one, and only one per netns.
1014 * Allowing to move it to another netns is clearly unsafe.
1016 if (!IS_ERR(itn->fb_tunnel_dev)) {
1017 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1018 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1019 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1023 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1025 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1027 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1028 struct rtnl_link_ops *ops)
1030 struct net *net = dev_net(itn->fb_tunnel_dev);
1031 struct net_device *dev, *aux;
1034 for_each_netdev_safe(net, dev, aux)
1035 if (dev->rtnl_link_ops == ops)
1036 unregister_netdevice_queue(dev, head);
1038 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1039 struct ip_tunnel *t;
1040 struct hlist_node *n;
1041 struct hlist_head *thead = &itn->tunnels[h];
1043 hlist_for_each_entry_safe(t, n, thead, hash_node)
1044 /* If dev is in the same netns, it has already
1045 * been added to the list by the previous loop.
1047 if (!net_eq(dev_net(t->dev), net))
1048 unregister_netdevice_queue(t->dev, head);
1052 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1057 ip_tunnel_destroy(itn, &list, ops);
1058 unregister_netdevice_many(&list);
1061 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1063 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1064 struct ip_tunnel_parm *p)
1066 struct ip_tunnel *nt;
1067 struct net *net = dev_net(dev);
1068 struct ip_tunnel_net *itn;
1072 nt = netdev_priv(dev);
1073 itn = net_generic(net, nt->ip_tnl_net_id);
1075 if (ip_tunnel_find(itn, p, dev->type))
1080 err = register_netdevice(dev);
1084 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1085 eth_hw_addr_random(dev);
1087 mtu = ip_tunnel_bind_dev(dev);
1091 ip_tunnel_add(itn, nt);
1096 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1098 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1099 struct ip_tunnel_parm *p)
1101 struct ip_tunnel *t;
1102 struct ip_tunnel *tunnel = netdev_priv(dev);
1103 struct net *net = tunnel->net;
1104 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1106 if (dev == itn->fb_tunnel_dev)
1109 t = ip_tunnel_find(itn, p, dev->type);
1117 if (dev->type != ARPHRD_ETHER) {
1118 unsigned int nflags = 0;
1120 if (ipv4_is_multicast(p->iph.daddr))
1121 nflags = IFF_BROADCAST;
1122 else if (p->iph.daddr)
1123 nflags = IFF_POINTOPOINT;
1125 if ((dev->flags ^ nflags) &
1126 (IFF_POINTOPOINT | IFF_BROADCAST))
1131 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1134 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1136 int ip_tunnel_init(struct net_device *dev)
1138 struct ip_tunnel *tunnel = netdev_priv(dev);
1139 struct iphdr *iph = &tunnel->parms.iph;
1142 dev->destructor = ip_tunnel_dev_free;
1143 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1147 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1148 if (!tunnel->dst_cache) {
1149 free_percpu(dev->tstats);
1153 err = gro_cells_init(&tunnel->gro_cells, dev);
1155 free_percpu(tunnel->dst_cache);
1156 free_percpu(dev->tstats);
1161 tunnel->net = dev_net(dev);
1162 strcpy(tunnel->parms.name, dev->name);
1168 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1170 void ip_tunnel_uninit(struct net_device *dev)
1172 struct ip_tunnel *tunnel = netdev_priv(dev);
1173 struct net *net = tunnel->net;
1174 struct ip_tunnel_net *itn;
1176 itn = net_generic(net, tunnel->ip_tnl_net_id);
1177 /* fb_tunnel_dev will be unregisted in net-exit call. */
1178 if (itn->fb_tunnel_dev != dev)
1179 ip_tunnel_del(netdev_priv(dev));
1181 ip_tunnel_dst_reset_all(tunnel);
1183 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1185 /* Do least required initialization, rest of init is done in tunnel_init call */
1186 void ip_tunnel_setup(struct net_device *dev, int net_id)
1188 struct ip_tunnel *tunnel = netdev_priv(dev);
1189 tunnel->ip_tnl_net_id = net_id;
1191 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1193 MODULE_LICENSE("GPL");