2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
61 #if IS_ENABLED(CONFIG_IPV6)
63 #include <net/ip6_fib.h>
64 #include <net/ip6_route.h>
67 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
69 return hash_32((__force u32)key ^ (__force u32)remote,
73 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
74 struct dst_entry *dst, __be32 saddr)
76 struct dst_entry *old_dst;
79 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
84 static noinline void tunnel_dst_set(struct ip_tunnel *t,
85 struct dst_entry *dst, __be32 saddr)
87 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
90 static void tunnel_dst_reset(struct ip_tunnel *t)
92 tunnel_dst_set(t, NULL, 0);
95 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
99 for_each_possible_cpu(i)
100 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
102 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
105 u32 cookie, __be32 *saddr)
107 struct ip_tunnel_dst *idst;
108 struct dst_entry *dst;
111 idst = raw_cpu_ptr(t->dst_cache);
112 dst = rcu_dereference(idst->dst);
113 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
116 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
117 *saddr = idst->saddr;
125 return (struct rtable *)dst;
128 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
129 __be16 flags, __be32 key)
131 if (p->i_flags & TUNNEL_KEY) {
132 if (flags & TUNNEL_KEY)
133 return key == p->i_key;
135 /* key expected, none present */
138 return !(flags & TUNNEL_KEY);
141 /* Fallback tunnel: no source, no destination, no key, no options
144 We require exact key match i.e. if a key is present in packet
145 it will match only tunnel with the same key; if it is not present,
146 it will match only keyless tunnel.
148 All keysless packets, if not matched configured keyless tunnels
149 will match fallback tunnel.
150 Given src, dst and key, find appropriate for input tunnel.
152 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
153 int link, __be16 flags,
154 __be32 remote, __be32 local,
158 struct ip_tunnel *t, *cand = NULL;
159 struct hlist_head *head;
161 hash = ip_tunnel_hash(key, remote);
162 head = &itn->tunnels[hash];
164 hlist_for_each_entry_rcu(t, head, hash_node) {
165 if (local != t->parms.iph.saddr ||
166 remote != t->parms.iph.daddr ||
167 !(t->dev->flags & IFF_UP))
170 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 if (t->parms.link == link)
179 hlist_for_each_entry_rcu(t, head, hash_node) {
180 if (remote != t->parms.iph.daddr ||
181 t->parms.iph.saddr != 0 ||
182 !(t->dev->flags & IFF_UP))
185 if (!ip_tunnel_key_match(&t->parms, flags, key))
188 if (t->parms.link == link)
194 hash = ip_tunnel_hash(key, 0);
195 head = &itn->tunnels[hash];
197 hlist_for_each_entry_rcu(t, head, hash_node) {
198 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
199 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
202 if (!(t->dev->flags & IFF_UP))
205 if (!ip_tunnel_key_match(&t->parms, flags, key))
208 if (t->parms.link == link)
214 if (flags & TUNNEL_NO_KEY)
215 goto skip_key_lookup;
217 hlist_for_each_entry_rcu(t, head, hash_node) {
218 if (t->parms.i_key != key ||
219 t->parms.iph.saddr != 0 ||
220 t->parms.iph.daddr != 0 ||
221 !(t->dev->flags & IFF_UP))
224 if (t->parms.link == link)
234 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
235 return netdev_priv(itn->fb_tunnel_dev);
240 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
242 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
243 struct ip_tunnel_parm *parms)
247 __be32 i_key = parms->i_key;
249 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
250 remote = parms->iph.daddr;
254 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
257 h = ip_tunnel_hash(i_key, remote);
258 return &itn->tunnels[h];
261 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
263 struct hlist_head *head = ip_bucket(itn, &t->parms);
265 hlist_add_head_rcu(&t->hash_node, head);
268 static void ip_tunnel_del(struct ip_tunnel *t)
270 hlist_del_init_rcu(&t->hash_node);
273 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
274 struct ip_tunnel_parm *parms,
277 __be32 remote = parms->iph.daddr;
278 __be32 local = parms->iph.saddr;
279 __be32 key = parms->i_key;
280 __be16 flags = parms->i_flags;
281 int link = parms->link;
282 struct ip_tunnel *t = NULL;
283 struct hlist_head *head = ip_bucket(itn, parms);
285 hlist_for_each_entry_rcu(t, head, hash_node) {
286 if (local == t->parms.iph.saddr &&
287 remote == t->parms.iph.daddr &&
288 link == t->parms.link &&
289 type == t->dev->type &&
290 ip_tunnel_key_match(&t->parms, flags, key))
296 static struct net_device *__ip_tunnel_create(struct net *net,
297 const struct rtnl_link_ops *ops,
298 struct ip_tunnel_parm *parms)
301 struct ip_tunnel *tunnel;
302 struct net_device *dev;
306 strlcpy(name, parms->name, IFNAMSIZ);
308 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
312 strlcpy(name, ops->kind, IFNAMSIZ);
313 strncat(name, "%d", 2);
317 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
322 dev_net_set(dev, net);
324 dev->rtnl_link_ops = ops;
326 tunnel = netdev_priv(dev);
327 tunnel->parms = *parms;
330 err = register_netdevice(dev);
342 static inline void init_tunnel_flow(struct flowi4 *fl4,
344 __be32 daddr, __be32 saddr,
345 __be32 key, __u8 tos, int oif)
347 memset(fl4, 0, sizeof(*fl4));
348 fl4->flowi4_oif = oif;
351 fl4->flowi4_tos = tos;
352 fl4->flowi4_proto = proto;
353 fl4->fl4_gre_key = key;
356 static int ip_tunnel_bind_dev(struct net_device *dev)
358 struct net_device *tdev = NULL;
359 struct ip_tunnel *tunnel = netdev_priv(dev);
360 const struct iphdr *iph;
361 int hlen = LL_MAX_HEADER;
362 int mtu = ETH_DATA_LEN;
363 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
365 iph = &tunnel->parms.iph;
367 /* Guess output device to choose reasonable mtu and needed_headroom */
372 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
373 iph->saddr, tunnel->parms.o_key,
374 RT_TOS(iph->tos), tunnel->parms.link);
375 rt = ip_route_output_key(tunnel->net, &fl4);
379 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
382 if (dev->type != ARPHRD_ETHER)
383 dev->flags |= IFF_POINTOPOINT;
386 if (!tdev && tunnel->parms.link)
387 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
390 hlen = tdev->hard_header_len + tdev->needed_headroom;
393 dev->iflink = tunnel->parms.link;
395 dev->needed_headroom = t_hlen + hlen;
396 mtu -= (dev->hard_header_len + t_hlen);
404 static struct ip_tunnel *ip_tunnel_create(struct net *net,
405 struct ip_tunnel_net *itn,
406 struct ip_tunnel_parm *parms)
408 struct ip_tunnel *nt;
409 struct net_device *dev;
411 BUG_ON(!itn->fb_tunnel_dev);
412 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
414 return ERR_CAST(dev);
416 dev->mtu = ip_tunnel_bind_dev(dev);
418 nt = netdev_priv(dev);
419 ip_tunnel_add(itn, nt);
423 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
424 const struct tnl_ptk_info *tpi, bool log_ecn_error)
426 struct pcpu_sw_netstats *tstats;
427 const struct iphdr *iph = ip_hdr(skb);
430 #ifdef CONFIG_NET_IPGRE_BROADCAST
431 if (ipv4_is_multicast(iph->daddr)) {
432 tunnel->dev->stats.multicast++;
433 skb->pkt_type = PACKET_BROADCAST;
437 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
438 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
439 tunnel->dev->stats.rx_crc_errors++;
440 tunnel->dev->stats.rx_errors++;
444 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
445 if (!(tpi->flags&TUNNEL_SEQ) ||
446 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
447 tunnel->dev->stats.rx_fifo_errors++;
448 tunnel->dev->stats.rx_errors++;
451 tunnel->i_seqno = ntohl(tpi->seq) + 1;
454 skb_reset_network_header(skb);
456 err = IP_ECN_decapsulate(iph, skb);
459 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
460 &iph->saddr, iph->tos);
462 ++tunnel->dev->stats.rx_frame_errors;
463 ++tunnel->dev->stats.rx_errors;
468 tstats = this_cpu_ptr(tunnel->dev->tstats);
469 u64_stats_update_begin(&tstats->syncp);
470 tstats->rx_packets++;
471 tstats->rx_bytes += skb->len;
472 u64_stats_update_end(&tstats->syncp);
474 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
476 if (tunnel->dev->type == ARPHRD_ETHER) {
477 skb->protocol = eth_type_trans(skb, tunnel->dev);
478 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
480 skb->dev = tunnel->dev;
483 gro_cells_receive(&tunnel->gro_cells, skb);
490 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
492 static int ip_encap_hlen(struct ip_tunnel_encap *e)
495 case TUNNEL_ENCAP_NONE:
497 case TUNNEL_ENCAP_FOU:
498 return sizeof(struct udphdr);
499 case TUNNEL_ENCAP_GUE:
500 return sizeof(struct udphdr) + sizeof(struct guehdr);
506 int ip_tunnel_encap_setup(struct ip_tunnel *t,
507 struct ip_tunnel_encap *ipencap)
511 memset(&t->encap, 0, sizeof(t->encap));
513 hlen = ip_encap_hlen(ipencap);
517 t->encap.type = ipencap->type;
518 t->encap.sport = ipencap->sport;
519 t->encap.dport = ipencap->dport;
520 t->encap.flags = ipencap->flags;
522 t->encap_hlen = hlen;
523 t->hlen = t->encap_hlen + t->tun_hlen;
527 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
529 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
530 size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
534 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
535 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
537 skb = iptunnel_handle_offloads(skb, csum, type);
542 /* Get length and hash before making space in skb */
544 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
547 skb_push(skb, hdr_len);
549 skb_reset_transport_header(skb);
552 if (e->type == TUNNEL_ENCAP_GUE) {
553 struct guehdr *guehdr = (struct guehdr *)&uh[1];
558 guehdr->next_hdr = *protocol;
563 uh->len = htons(skb->len);
565 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
566 fl4->saddr, fl4->daddr, skb->len);
568 *protocol = IPPROTO_UDP;
573 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
574 u8 *protocol, struct flowi4 *fl4)
576 switch (t->encap.type) {
577 case TUNNEL_ENCAP_NONE:
579 case TUNNEL_ENCAP_FOU:
580 case TUNNEL_ENCAP_GUE:
581 return fou_build_header(skb, &t->encap, t->encap_hlen,
587 EXPORT_SYMBOL(ip_tunnel_encap);
589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590 struct rtable *rt, __be16 df)
592 struct ip_tunnel *tunnel = netdev_priv(dev);
593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
597 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598 - sizeof(struct iphdr) - tunnel->hlen;
600 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
605 if (skb->protocol == htons(ETH_P_IP)) {
606 if (!skb_is_gso(skb) &&
607 (df & htons(IP_DF)) && mtu < pkt_size) {
608 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
613 #if IS_ENABLED(CONFIG_IPV6)
614 else if (skb->protocol == htons(ETH_P_IPV6)) {
615 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
617 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
618 mtu >= IPV6_MIN_MTU) {
619 if ((tunnel->parms.iph.daddr &&
620 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
621 rt6->rt6i_dst.plen == 128) {
622 rt6->rt6i_flags |= RTF_MODIFIED;
623 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
627 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
629 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
638 const struct iphdr *tnl_params, u8 protocol)
640 struct ip_tunnel *tunnel = netdev_priv(dev);
641 const struct iphdr *inner_iph;
645 struct rtable *rt; /* Route to the other host */
646 unsigned int max_headroom; /* The extra header space needed */
651 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
652 connected = (tunnel->parms.iph.daddr != 0);
654 dst = tnl_params->daddr;
658 if (skb_dst(skb) == NULL) {
659 dev->stats.tx_fifo_errors++;
663 if (skb->protocol == htons(ETH_P_IP)) {
664 rt = skb_rtable(skb);
665 dst = rt_nexthop(rt, inner_iph->daddr);
667 #if IS_ENABLED(CONFIG_IPV6)
668 else if (skb->protocol == htons(ETH_P_IPV6)) {
669 const struct in6_addr *addr6;
670 struct neighbour *neigh;
671 bool do_tx_error_icmp;
674 neigh = dst_neigh_lookup(skb_dst(skb),
675 &ipv6_hdr(skb)->daddr);
679 addr6 = (const struct in6_addr *)&neigh->primary_key;
680 addr_type = ipv6_addr_type(addr6);
682 if (addr_type == IPV6_ADDR_ANY) {
683 addr6 = &ipv6_hdr(skb)->daddr;
684 addr_type = ipv6_addr_type(addr6);
687 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
688 do_tx_error_icmp = true;
690 do_tx_error_icmp = false;
691 dst = addr6->s6_addr32[3];
693 neigh_release(neigh);
694 if (do_tx_error_icmp)
704 tos = tnl_params->tos;
707 if (skb->protocol == htons(ETH_P_IP)) {
708 tos = inner_iph->tos;
710 } else if (skb->protocol == htons(ETH_P_IPV6)) {
711 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
716 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
717 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
719 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
722 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
725 rt = ip_route_output_key(tunnel->net, &fl4);
728 dev->stats.tx_carrier_errors++;
732 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
735 if (rt->dst.dev == dev) {
737 dev->stats.collisions++;
741 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
746 if (tunnel->err_count > 0) {
747 if (time_before(jiffies,
748 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
751 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
752 dst_link_failure(skb);
754 tunnel->err_count = 0;
757 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
758 ttl = tnl_params->ttl;
760 if (skb->protocol == htons(ETH_P_IP))
761 ttl = inner_iph->ttl;
762 #if IS_ENABLED(CONFIG_IPV6)
763 else if (skb->protocol == htons(ETH_P_IPV6))
764 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
767 ttl = ip4_dst_hoplimit(&rt->dst);
770 df = tnl_params->frag_off;
771 if (skb->protocol == htons(ETH_P_IP))
772 df |= (inner_iph->frag_off&htons(IP_DF));
774 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
775 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
776 if (max_headroom > dev->needed_headroom)
777 dev->needed_headroom = max_headroom;
779 if (skb_cow_head(skb, dev->needed_headroom)) {
781 dev->stats.tx_dropped++;
786 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
787 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
788 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
792 #if IS_ENABLED(CONFIG_IPV6)
794 dst_link_failure(skb);
797 dev->stats.tx_errors++;
800 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
802 static void ip_tunnel_update(struct ip_tunnel_net *itn,
804 struct net_device *dev,
805 struct ip_tunnel_parm *p,
809 t->parms.iph.saddr = p->iph.saddr;
810 t->parms.iph.daddr = p->iph.daddr;
811 t->parms.i_key = p->i_key;
812 t->parms.o_key = p->o_key;
813 if (dev->type != ARPHRD_ETHER) {
814 memcpy(dev->dev_addr, &p->iph.saddr, 4);
815 memcpy(dev->broadcast, &p->iph.daddr, 4);
817 ip_tunnel_add(itn, t);
819 t->parms.iph.ttl = p->iph.ttl;
820 t->parms.iph.tos = p->iph.tos;
821 t->parms.iph.frag_off = p->iph.frag_off;
823 if (t->parms.link != p->link) {
826 t->parms.link = p->link;
827 mtu = ip_tunnel_bind_dev(dev);
831 ip_tunnel_dst_reset_all(t);
832 netdev_state_change(dev);
835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
838 struct ip_tunnel *t = netdev_priv(dev);
839 struct net *net = t->net;
840 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
842 BUG_ON(!itn->fb_tunnel_dev);
845 if (dev == itn->fb_tunnel_dev) {
846 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
848 t = netdev_priv(dev);
850 memcpy(p, &t->parms, sizeof(*p));
856 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
859 p->iph.frag_off |= htons(IP_DF);
860 if (!(p->i_flags & VTI_ISVTI)) {
861 if (!(p->i_flags & TUNNEL_KEY))
863 if (!(p->o_flags & TUNNEL_KEY))
867 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
869 if (cmd == SIOCADDTUNNEL) {
871 t = ip_tunnel_create(net, itn, p);
872 err = PTR_ERR_OR_ZERO(t);
879 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
886 unsigned int nflags = 0;
888 if (ipv4_is_multicast(p->iph.daddr))
889 nflags = IFF_BROADCAST;
890 else if (p->iph.daddr)
891 nflags = IFF_POINTOPOINT;
893 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
898 t = netdev_priv(dev);
904 ip_tunnel_update(itn, t, dev, p, true);
912 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
915 if (dev == itn->fb_tunnel_dev) {
917 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
921 if (t == netdev_priv(itn->fb_tunnel_dev))
925 unregister_netdevice(dev);
936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
938 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
940 struct ip_tunnel *tunnel = netdev_priv(dev);
941 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
944 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
949 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
951 static void ip_tunnel_dev_free(struct net_device *dev)
953 struct ip_tunnel *tunnel = netdev_priv(dev);
955 gro_cells_destroy(&tunnel->gro_cells);
956 free_percpu(tunnel->dst_cache);
957 free_percpu(dev->tstats);
961 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
963 struct ip_tunnel *tunnel = netdev_priv(dev);
964 struct ip_tunnel_net *itn;
966 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
968 if (itn->fb_tunnel_dev != dev) {
969 ip_tunnel_del(netdev_priv(dev));
970 unregister_netdevice_queue(dev, head);
973 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
975 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
976 struct rtnl_link_ops *ops, char *devname)
978 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
979 struct ip_tunnel_parm parms;
982 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
983 INIT_HLIST_HEAD(&itn->tunnels[i]);
986 itn->fb_tunnel_dev = NULL;
990 memset(&parms, 0, sizeof(parms));
992 strlcpy(parms.name, devname, IFNAMSIZ);
995 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
996 /* FB netdevice is special: we have one, and only one per netns.
997 * Allowing to move it to another netns is clearly unsafe.
999 if (!IS_ERR(itn->fb_tunnel_dev)) {
1000 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1001 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1002 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1006 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1008 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1010 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1011 struct rtnl_link_ops *ops)
1013 struct net *net = dev_net(itn->fb_tunnel_dev);
1014 struct net_device *dev, *aux;
1017 for_each_netdev_safe(net, dev, aux)
1018 if (dev->rtnl_link_ops == ops)
1019 unregister_netdevice_queue(dev, head);
1021 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1022 struct ip_tunnel *t;
1023 struct hlist_node *n;
1024 struct hlist_head *thead = &itn->tunnels[h];
1026 hlist_for_each_entry_safe(t, n, thead, hash_node)
1027 /* If dev is in the same netns, it has already
1028 * been added to the list by the previous loop.
1030 if (!net_eq(dev_net(t->dev), net))
1031 unregister_netdevice_queue(t->dev, head);
1035 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1040 ip_tunnel_destroy(itn, &list, ops);
1041 unregister_netdevice_many(&list);
1044 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1046 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1047 struct ip_tunnel_parm *p)
1049 struct ip_tunnel *nt;
1050 struct net *net = dev_net(dev);
1051 struct ip_tunnel_net *itn;
1055 nt = netdev_priv(dev);
1056 itn = net_generic(net, nt->ip_tnl_net_id);
1058 if (ip_tunnel_find(itn, p, dev->type))
1063 err = register_netdevice(dev);
1067 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1068 eth_hw_addr_random(dev);
1070 mtu = ip_tunnel_bind_dev(dev);
1074 ip_tunnel_add(itn, nt);
1079 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1081 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1082 struct ip_tunnel_parm *p)
1084 struct ip_tunnel *t;
1085 struct ip_tunnel *tunnel = netdev_priv(dev);
1086 struct net *net = tunnel->net;
1087 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1089 if (dev == itn->fb_tunnel_dev)
1092 t = ip_tunnel_find(itn, p, dev->type);
1100 if (dev->type != ARPHRD_ETHER) {
1101 unsigned int nflags = 0;
1103 if (ipv4_is_multicast(p->iph.daddr))
1104 nflags = IFF_BROADCAST;
1105 else if (p->iph.daddr)
1106 nflags = IFF_POINTOPOINT;
1108 if ((dev->flags ^ nflags) &
1109 (IFF_POINTOPOINT | IFF_BROADCAST))
1114 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1117 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1119 int ip_tunnel_init(struct net_device *dev)
1121 struct ip_tunnel *tunnel = netdev_priv(dev);
1122 struct iphdr *iph = &tunnel->parms.iph;
1125 dev->destructor = ip_tunnel_dev_free;
1126 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1130 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1131 if (!tunnel->dst_cache) {
1132 free_percpu(dev->tstats);
1136 err = gro_cells_init(&tunnel->gro_cells, dev);
1138 free_percpu(tunnel->dst_cache);
1139 free_percpu(dev->tstats);
1144 tunnel->net = dev_net(dev);
1145 strcpy(tunnel->parms.name, dev->name);
1151 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1153 void ip_tunnel_uninit(struct net_device *dev)
1155 struct ip_tunnel *tunnel = netdev_priv(dev);
1156 struct net *net = tunnel->net;
1157 struct ip_tunnel_net *itn;
1159 itn = net_generic(net, tunnel->ip_tnl_net_id);
1160 /* fb_tunnel_dev will be unregisted in net-exit call. */
1161 if (itn->fb_tunnel_dev != dev)
1162 ip_tunnel_del(netdev_priv(dev));
1164 ip_tunnel_dst_reset_all(tunnel);
1166 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1168 /* Do least required initialization, rest of init is done in tunnel_init call */
1169 void ip_tunnel_setup(struct net_device *dev, int net_id)
1171 struct ip_tunnel *tunnel = netdev_priv(dev);
1172 tunnel->ip_tnl_net_id = net_id;
1174 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1176 MODULE_LICENSE("GPL");