2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
51 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
124 static __sum16 check_checksum(struct sk_buff *skb)
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
146 static int ip_gre_calc_hlen(__be16 o_flags)
150 if (o_flags&TUNNEL_CSUM)
152 if (o_flags&TUNNEL_KEY)
154 if (o_flags&TUNNEL_SEQ)
159 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
176 if (!pskb_may_pull(skb, *hdr_len))
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
181 tpi->proto = greh->protocol;
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
192 if (greh->flags & GRE_KEY) {
198 if (unlikely(greh->flags & GRE_SEQ)) {
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
212 if (!pskb_may_pull(skb, *hdr_len))
220 static void ipgre_err(struct sk_buff *skb, u32 info)
223 /* All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible.
227 Moreover, Cisco "wise men" put GRE key to the third word
228 in GRE header. It makes impossible maintaining even soft
229 state for keyed GRE tunnels with enabled checksum. Tell
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
238 const struct iphdr *iph;
239 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code;
242 struct tnl_ptk_info tpi;
244 bool csum_err = false;
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
253 case ICMP_PARAMETERPROB:
256 case ICMP_DEST_UNREACH:
259 case ICMP_PORT_UNREACH:
260 /* Impossible event. */
263 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH,
265 I believe they are just ether pollution. --ANK
270 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL)
279 if (tpi.proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id);
282 itn = net_generic(net, ipgre_net_id);
284 iph = (const struct iphdr *)skb->data;
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
286 iph->daddr, iph->saddr, tpi.key);
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
301 if (t->parms.iph.daddr == 0 ||
302 ipv4_is_multicast(t->parms.iph.daddr))
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
312 t->err_time = jiffies;
315 static int ipgre_rcv(struct sk_buff *skb)
317 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn;
319 const struct iphdr *iph;
320 struct ip_tunnel *tunnel;
321 struct tnl_ptk_info tpi;
323 bool csum_err = false;
325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
328 if (tpi.proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id);
331 itn = net_generic(net, ipgre_net_id);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
335 iph->saddr, iph->daddr, tpi.key);
338 ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error);
341 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
347 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
351 if (skb_is_gso(skb)) {
352 err = skb_unclone(skb, GFP_ATOMIC);
355 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
357 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
358 tunnel->parms.o_flags&TUNNEL_CSUM) {
359 err = skb_checksum_help(skb);
362 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
363 skb->ip_summed = CHECKSUM_NONE;
372 static struct sk_buff *gre_build_header(struct sk_buff *skb,
373 const struct tnl_ptk_info *tpi,
376 struct gre_base_hdr *greh;
378 skb_push(skb, hdr_len);
380 greh = (struct gre_base_hdr *)skb->data;
381 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
382 greh->protocol = tpi->proto;
384 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
385 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
387 if (tpi->flags&TUNNEL_SEQ) {
391 if (tpi->flags&TUNNEL_KEY) {
395 if (tpi->flags&TUNNEL_CSUM &&
396 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
398 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
406 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
407 const struct iphdr *tnl_params,
410 struct ip_tunnel *tunnel = netdev_priv(dev);
411 struct tnl_ptk_info tpi;
413 if (likely(!skb->encapsulation)) {
414 skb_reset_inner_headers(skb);
415 skb->encapsulation = 1;
418 tpi.flags = tunnel->parms.o_flags;
420 tpi.key = tunnel->parms.o_key;
421 if (tunnel->parms.o_flags & TUNNEL_SEQ)
423 tpi.seq = htonl(tunnel->o_seqno);
425 /* Push GRE header. */
426 skb = gre_build_header(skb, &tpi, tunnel->hlen);
427 if (unlikely(!skb)) {
428 dev->stats.tx_dropped++;
432 ip_tunnel_xmit(skb, dev, tnl_params);
435 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
436 struct net_device *dev)
438 struct ip_tunnel *tunnel = netdev_priv(dev);
439 const struct iphdr *tnl_params;
441 skb = handle_offloads(tunnel, skb);
445 if (dev->header_ops) {
446 /* Need space for new headers */
447 if (skb_cow_head(skb, dev->needed_headroom -
448 (tunnel->hlen + sizeof(struct iphdr))))
451 tnl_params = (const struct iphdr *)skb->data;
453 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
456 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
458 if (skb_cow_head(skb, dev->needed_headroom))
461 tnl_params = &tunnel->parms.iph;
464 __gre_xmit(skb, dev, tnl_params, skb->protocol);
471 dev->stats.tx_dropped++;
475 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
476 struct net_device *dev)
478 struct ip_tunnel *tunnel = netdev_priv(dev);
480 skb = handle_offloads(tunnel, skb);
484 if (skb_cow_head(skb, dev->needed_headroom))
487 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
494 dev->stats.tx_dropped++;
498 static int ipgre_tunnel_ioctl(struct net_device *dev,
499 struct ifreq *ifr, int cmd)
502 struct ip_tunnel_parm p;
504 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
506 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
507 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
508 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
509 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
512 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
513 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
515 err = ip_tunnel_ioctl(dev, &p, cmd);
519 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
520 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
522 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
527 /* Nice toy. Unfortunately, useless in real life :-)
528 It allows to construct virtual multiprotocol broadcast "LAN"
529 over the Internet, provided multicast routing is tuned.
532 I have no idea was this bicycle invented before me,
533 so that I had to set ARPHRD_IPGRE to a random value.
534 I have an impression, that Cisco could make something similar,
535 but this feature is apparently missing in IOS<=11.2(8).
537 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
538 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
540 ping -t 255 224.66.66.66
542 If nobody answers, mbone does not work.
544 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
545 ip addr add 10.66.66.<somewhat>/24 dev Universe
547 ifconfig Universe add fe80::<Your_real_addr>/10
548 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
551 ftp fec0:6666:6666::193.233.7.65
554 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
556 const void *daddr, const void *saddr, unsigned int len)
558 struct ip_tunnel *t = netdev_priv(dev);
560 struct gre_base_hdr *greh;
562 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
563 greh = (struct gre_base_hdr *)(iph+1);
564 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
565 greh->protocol = htons(type);
567 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
569 /* Set the source hardware address. */
571 memcpy(&iph->saddr, saddr, 4);
573 memcpy(&iph->daddr, daddr, 4);
575 return t->hlen + sizeof(*iph);
577 return -(t->hlen + sizeof(*iph));
580 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
582 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
583 memcpy(haddr, &iph->saddr, 4);
587 static const struct header_ops ipgre_header_ops = {
588 .create = ipgre_header,
589 .parse = ipgre_header_parse,
592 #ifdef CONFIG_NET_IPGRE_BROADCAST
593 static int ipgre_open(struct net_device *dev)
595 struct ip_tunnel *t = netdev_priv(dev);
597 if (ipv4_is_multicast(t->parms.iph.daddr)) {
601 rt = ip_route_output_gre(dev_net(dev), &fl4,
605 RT_TOS(t->parms.iph.tos),
608 return -EADDRNOTAVAIL;
611 if (__in_dev_get_rtnl(dev) == NULL)
612 return -EADDRNOTAVAIL;
613 t->mlink = dev->ifindex;
614 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
619 static int ipgre_close(struct net_device *dev)
621 struct ip_tunnel *t = netdev_priv(dev);
623 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
624 struct in_device *in_dev;
625 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
627 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
633 static const struct net_device_ops ipgre_netdev_ops = {
634 .ndo_init = ipgre_tunnel_init,
635 .ndo_uninit = ip_tunnel_uninit,
636 #ifdef CONFIG_NET_IPGRE_BROADCAST
637 .ndo_open = ipgre_open,
638 .ndo_stop = ipgre_close,
640 .ndo_start_xmit = ipgre_xmit,
641 .ndo_do_ioctl = ipgre_tunnel_ioctl,
642 .ndo_change_mtu = ip_tunnel_change_mtu,
643 .ndo_get_stats64 = ip_tunnel_get_stats64,
646 #define GRE_FEATURES (NETIF_F_SG | \
651 static void ipgre_tunnel_setup(struct net_device *dev)
653 dev->netdev_ops = &ipgre_netdev_ops;
654 ip_tunnel_setup(dev, ipgre_net_id);
657 static void __gre_tunnel_init(struct net_device *dev)
659 struct ip_tunnel *tunnel;
661 tunnel = netdev_priv(dev);
662 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
663 tunnel->parms.iph.protocol = IPPROTO_GRE;
665 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
666 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
668 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
669 dev->hw_features |= GRE_FEATURES;
671 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
672 /* TCP offload with GRE SEQ is not supported. */
673 dev->features |= NETIF_F_GSO_SOFTWARE;
674 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
675 /* Can use a lockless transmit, unless we generate
678 dev->features |= NETIF_F_LLTX;
682 static int ipgre_tunnel_init(struct net_device *dev)
684 struct ip_tunnel *tunnel = netdev_priv(dev);
685 struct iphdr *iph = &tunnel->parms.iph;
687 __gre_tunnel_init(dev);
689 memcpy(dev->dev_addr, &iph->saddr, 4);
690 memcpy(dev->broadcast, &iph->daddr, 4);
692 dev->type = ARPHRD_IPGRE;
693 dev->flags = IFF_NOARP;
694 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
698 #ifdef CONFIG_NET_IPGRE_BROADCAST
699 if (ipv4_is_multicast(iph->daddr)) {
702 dev->flags = IFF_BROADCAST;
703 dev->header_ops = &ipgre_header_ops;
707 dev->header_ops = &ipgre_header_ops;
709 return ip_tunnel_init(dev);
712 static const struct gre_protocol ipgre_protocol = {
713 .handler = ipgre_rcv,
714 .err_handler = ipgre_err,
717 static int __net_init ipgre_init_net(struct net *net)
719 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
722 static void __net_exit ipgre_exit_net(struct net *net)
724 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
725 ip_tunnel_delete_net(itn);
728 static struct pernet_operations ipgre_net_ops = {
729 .init = ipgre_init_net,
730 .exit = ipgre_exit_net,
732 .size = sizeof(struct ip_tunnel_net),
735 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
743 if (data[IFLA_GRE_IFLAGS])
744 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
745 if (data[IFLA_GRE_OFLAGS])
746 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
747 if (flags & (GRE_VERSION|GRE_ROUTING))
753 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
757 if (tb[IFLA_ADDRESS]) {
758 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
760 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
761 return -EADDRNOTAVAIL;
767 if (data[IFLA_GRE_REMOTE]) {
768 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
774 return ipgre_tunnel_validate(tb, data);
777 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
778 struct ip_tunnel_parm *parms)
780 memset(parms, 0, sizeof(*parms));
782 parms->iph.protocol = IPPROTO_GRE;
787 if (data[IFLA_GRE_LINK])
788 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
790 if (data[IFLA_GRE_IFLAGS])
791 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
793 if (data[IFLA_GRE_OFLAGS])
794 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
796 if (data[IFLA_GRE_IKEY])
797 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
799 if (data[IFLA_GRE_OKEY])
800 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
802 if (data[IFLA_GRE_LOCAL])
803 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
805 if (data[IFLA_GRE_REMOTE])
806 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
808 if (data[IFLA_GRE_TTL])
809 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
811 if (data[IFLA_GRE_TOS])
812 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
814 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
815 parms->iph.frag_off = htons(IP_DF);
818 static int gre_tap_init(struct net_device *dev)
820 __gre_tunnel_init(dev);
822 return ip_tunnel_init(dev);
825 static const struct net_device_ops gre_tap_netdev_ops = {
826 .ndo_init = gre_tap_init,
827 .ndo_uninit = ip_tunnel_uninit,
828 .ndo_start_xmit = gre_tap_xmit,
829 .ndo_set_mac_address = eth_mac_addr,
830 .ndo_validate_addr = eth_validate_addr,
831 .ndo_change_mtu = ip_tunnel_change_mtu,
832 .ndo_get_stats64 = ip_tunnel_get_stats64,
835 static void ipgre_tap_setup(struct net_device *dev)
838 dev->netdev_ops = &gre_tap_netdev_ops;
839 ip_tunnel_setup(dev, gre_tap_net_id);
842 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
843 struct nlattr *tb[], struct nlattr *data[])
845 struct ip_tunnel_parm p;
847 ipgre_netlink_parms(data, tb, &p);
848 return ip_tunnel_newlink(dev, tb, &p);
851 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
852 struct nlattr *data[])
854 struct ip_tunnel_parm p;
856 ipgre_netlink_parms(data, tb, &p);
857 return ip_tunnel_changelink(dev, tb, &p);
860 static size_t ipgre_get_size(const struct net_device *dev)
865 /* IFLA_GRE_IFLAGS */
867 /* IFLA_GRE_OFLAGS */
875 /* IFLA_GRE_REMOTE */
881 /* IFLA_GRE_PMTUDISC */
886 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
888 struct ip_tunnel *t = netdev_priv(dev);
889 struct ip_tunnel_parm *p = &t->parms;
891 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
892 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
893 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
894 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
895 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
896 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
897 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
898 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
899 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
900 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
901 !!(p->iph.frag_off & htons(IP_DF))))
902 goto nla_put_failure;
909 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
910 [IFLA_GRE_LINK] = { .type = NLA_U32 },
911 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
912 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
913 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
914 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
915 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
916 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
917 [IFLA_GRE_TTL] = { .type = NLA_U8 },
918 [IFLA_GRE_TOS] = { .type = NLA_U8 },
919 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
922 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
924 .maxtype = IFLA_GRE_MAX,
925 .policy = ipgre_policy,
926 .priv_size = sizeof(struct ip_tunnel),
927 .setup = ipgre_tunnel_setup,
928 .validate = ipgre_tunnel_validate,
929 .newlink = ipgre_newlink,
930 .changelink = ipgre_changelink,
931 .dellink = ip_tunnel_dellink,
932 .get_size = ipgre_get_size,
933 .fill_info = ipgre_fill_info,
936 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
938 .maxtype = IFLA_GRE_MAX,
939 .policy = ipgre_policy,
940 .priv_size = sizeof(struct ip_tunnel),
941 .setup = ipgre_tap_setup,
942 .validate = ipgre_tap_validate,
943 .newlink = ipgre_newlink,
944 .changelink = ipgre_changelink,
945 .dellink = ip_tunnel_dellink,
946 .get_size = ipgre_get_size,
947 .fill_info = ipgre_fill_info,
950 static int __net_init ipgre_tap_init_net(struct net *net)
952 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
955 static void __net_exit ipgre_tap_exit_net(struct net *net)
957 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
958 ip_tunnel_delete_net(itn);
961 static struct pernet_operations ipgre_tap_net_ops = {
962 .init = ipgre_tap_init_net,
963 .exit = ipgre_tap_exit_net,
964 .id = &gre_tap_net_id,
965 .size = sizeof(struct ip_tunnel_net),
968 static int __init ipgre_init(void)
972 pr_info("GRE over IPv4 tunneling driver\n");
974 err = register_pernet_device(&ipgre_net_ops);
978 err = register_pernet_device(&ipgre_tap_net_ops);
982 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
984 pr_info("%s: can't add protocol\n", __func__);
985 goto add_proto_failed;
988 err = rtnl_link_register(&ipgre_link_ops);
990 goto rtnl_link_failed;
992 err = rtnl_link_register(&ipgre_tap_ops);
999 rtnl_link_unregister(&ipgre_link_ops);
1001 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1003 unregister_pernet_device(&ipgre_tap_net_ops);
1005 unregister_pernet_device(&ipgre_net_ops);
1009 static void __exit ipgre_fini(void)
1011 rtnl_link_unregister(&ipgre_tap_ops);
1012 rtnl_link_unregister(&ipgre_link_ops);
1013 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1014 pr_info("%s: can't remove protocol\n", __func__);
1015 unregister_pernet_device(&ipgre_tap_net_ops);
1016 unregister_pernet_device(&ipgre_net_ops);
1019 module_init(ipgre_init);
1020 module_exit(ipgre_fini);
1021 MODULE_LICENSE("GPL");
1022 MODULE_ALIAS_RTNL_LINK("gre");
1023 MODULE_ALIAS_RTNL_LINK("gretap");
1024 MODULE_ALIAS_NETDEV("gre0");
1025 MODULE_ALIAS_NETDEV("gretap0");