2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list;
78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
84 atomic_t cache_resolve_queue_len;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
93 struct fib_rule common;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache *mrt_cachep __read_mostly;
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
143 ipmr_for_each_table(mrt, net) {
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
170 switch (rule->action) {
173 case FR_ACT_UNREACHABLE:
175 case FR_ACT_PROHIBIT:
177 case FR_ACT_BLACKHOLE:
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
234 static int __net_init ipmr_rules_init(struct net *net)
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
256 net->ipv4.mr_rules_ops = ops;
262 fib_rules_unregister(ops);
266 static void __net_exit ipmr_rules_exit(struct net *net)
268 struct mr_table *mrt, *next;
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
282 return net->ipv4.mrt;
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
288 *mrt = net->ipv4.mrt;
292 static int __net_init ipmr_rules_init(struct net *net)
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
298 static void __net_exit ipmr_rules_exit(struct net *net)
300 kfree(net->ipv4.mrt);
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
306 struct mr_table *mrt;
309 mrt = ipmr_get_table(net, id);
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
316 write_pnet(&mrt->net, net);
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
328 #ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
341 struct net *net = dev_net(dev);
345 dev = __dev_get_by_name(net, "tunl0");
347 const struct net_device_ops *ops = dev->netdev_ops;
349 struct ip_tunnel_parm p;
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
373 struct net_device *dev;
375 dev = __dev_get_by_name(net, "tunl0");
378 const struct net_device_ops *ops = dev->netdev_ops;
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
406 dev->flags |= IFF_MULTICAST;
408 in_dev = __in_dev_get_rtnl(dev);
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
423 /* allow the register to be completed before unregistering. */
427 unregister_netdevice(dev);
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
444 err = ipmr_fib_lookup(net, &fl, &mrt);
450 read_lock(&mrt_lock);
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
454 read_unlock(&mrt_lock);
459 static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
463 static void reg_vif_setup(struct net_device *dev)
465 dev->type = ARPHRD_PIMREG;
466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
467 dev->flags = IFF_NOARP;
468 dev->netdev_ops = ®_vif_netdev_ops,
469 dev->destructor = free_netdev;
470 dev->features |= NETIF_F_NETNS_LOCAL;
473 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
475 struct net_device *dev;
476 struct in_device *in_dev;
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
482 sprintf(name, "pimreg%u", mrt->id);
484 dev = alloc_netdev(0, name, reg_vif_setup);
489 dev_net_set(dev, net);
491 if (register_netdevice(dev)) {
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
515 /* allow the register to be completed before unregistering. */
519 unregister_netdevice(dev);
526 * @notify: Set to 1, if the caller is a notifier_call
529 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
530 struct list_head *head)
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
536 if (vifi < 0 || vifi >= mrt->maxvif)
537 return -EADDRNOTAVAIL;
539 v = &mrt->vif_table[vifi];
541 write_lock_bh(&mrt_lock);
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
550 #ifdef CONFIG_IP_PIMSM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
555 if (vifi+1 == mrt->maxvif) {
557 for (tmp=vifi-1; tmp>=0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp))
564 write_unlock_bh(&mrt_lock);
566 dev_set_allmulti(dev, -1);
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev);
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head);
580 static inline void ipmr_cache_free(struct mfc_cache *c)
582 kmem_cache_free(mrt_cachep, c);
585 /* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
589 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
591 struct net *net = read_pnet(&mrt->net);
595 atomic_dec(&mrt->cache_resolve_queue_len);
597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
598 if (ip_hdr(skb)->version == 0) {
599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600 nlh->nlmsg_type = NLMSG_ERROR;
601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602 skb_trim(skb, nlh->nlmsg_len);
604 e->error = -ETIMEDOUT;
605 memset(&e->msg, 0, sizeof(e->msg));
607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
616 /* Timer process for the unresolved queue. */
618 static void ipmr_expire_process(unsigned long arg)
620 struct mr_table *mrt = (struct mr_table *)arg;
622 unsigned long expires;
623 struct mfc_cache *c, *next;
625 if (!spin_trylock(&mfc_unres_lock)) {
626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
630 if (list_empty(&mrt->mfc_unres_queue))
636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
637 if (time_after(c->mfc_un.unres.expires, now)) {
638 unsigned long interval = c->mfc_un.unres.expires - now;
639 if (interval < expires)
645 ipmr_destroy_unres(mrt, c);
648 if (!list_empty(&mrt->mfc_unres_queue))
649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
652 spin_unlock(&mfc_unres_lock);
655 /* Fill oifs list. It is called under write locked mrt_lock. */
657 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
662 cache->mfc_un.res.minvif = MAXVIFS;
663 cache->mfc_un.res.maxvif = 0;
664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667 if (VIF_EXISTS(mrt, vifi) &&
668 ttls[vifi] && ttls[vifi] < 255) {
669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670 if (cache->mfc_un.res.minvif > vifi)
671 cache->mfc_un.res.minvif = vifi;
672 if (cache->mfc_un.res.maxvif <= vifi)
673 cache->mfc_un.res.maxvif = vifi + 1;
678 static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
681 int vifi = vifc->vifc_vifi;
682 struct vif_device *v = &mrt->vif_table[vifi];
683 struct net_device *dev;
684 struct in_device *in_dev;
688 if (VIF_EXISTS(mrt, vifi))
691 switch (vifc->vifc_flags) {
692 #ifdef CONFIG_IP_PIMSM
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
698 if (mrt->mroute_reg_vif_num >= 0)
700 dev = ipmr_reg_vif(net, mrt);
703 err = dev_set_allmulti(dev, 1);
705 unregister_netdevice(dev);
712 dev = ipmr_new_tunnel(net, vifc);
715 err = dev_set_allmulti(dev, 1);
717 ipmr_del_tunnel(dev, vifc);
723 case VIFF_USE_IFINDEX:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && __in_dev_get_rtnl(dev) == NULL) {
729 return -EADDRNOTAVAIL;
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
735 return -EADDRNOTAVAIL;
736 err = dev_set_allmulti(dev, 1);
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
748 return -EADDRNOTAVAIL;
750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751 ip_rt_multicast_event(in_dev);
754 * Fill in the VIF structures
756 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr;
759 v->flags = vifc->vifc_flags;
761 v->flags |= VIFF_STATIC;
762 v->threshold = vifc->vifc_threshold;
767 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769 v->link = dev->iflink;
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock);
774 #ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER)
776 mrt->mroute_reg_vif_num = vifi;
778 if (vifi+1 > mrt->maxvif)
779 mrt->maxvif = vifi+1;
780 write_unlock_bh(&mrt_lock);
784 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
788 int line = MFC_HASH(mcastgrp, origin);
791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
799 * Allocate a multicast cache entry
801 static struct mfc_cache *ipmr_cache_alloc(void)
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
806 c->mfc_un.res.minvif = MAXVIFS;
810 static struct mfc_cache *ipmr_cache_alloc_unres(void)
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ;
821 * A cache entry has gone into a resolved state from queued
824 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
831 * Play the pending entries through our router
834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835 if (ip_hdr(skb)->version == 0) {
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
842 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844 skb_trim(skb, nlh->nlmsg_len);
846 e->error = -EMSGSIZE;
847 memset(&e->msg, 0, sizeof(e->msg));
850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
852 ip_mr_forward(net, mrt, skb, c, 0);
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
860 * Called under mrt_lock.
863 static int ipmr_cache_report(struct mr_table *mrt,
864 struct sk_buff *pkt, vifi_t vifi, int assert)
867 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp;
870 struct sock *mroute_sk;
873 #ifdef CONFIG_IP_PIMSM
874 if (assert == IGMPMSG_WHOLEPKT)
875 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
878 skb = alloc_skb(128, GFP_ATOMIC);
883 #ifdef CONFIG_IP_PIMSM
884 if (assert == IGMPMSG_WHOLEPKT) {
885 /* Ugly, but we have no choice with this interface.
886 Duplicate old header, fix ihl, length etc.
887 And all this only to mangle msg->im_msgtype and
888 to set msg->im_mbz to "mbz" :-)
890 skb_push(skb, sizeof(struct iphdr));
891 skb_reset_network_header(skb);
892 skb_reset_transport_header(skb);
893 msg = (struct igmpmsg *)skb_network_header(skb);
894 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
895 msg->im_msgtype = IGMPMSG_WHOLEPKT;
897 msg->im_vif = mrt->mroute_reg_vif_num;
898 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
899 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
900 sizeof(struct iphdr));
909 skb->network_header = skb->tail;
911 skb_copy_to_linear_data(skb, pkt->data, ihl);
912 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
913 msg = (struct igmpmsg *)skb_network_header(skb);
915 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
921 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
923 msg->im_msgtype = assert;
925 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
926 skb->transport_header = skb->network_header;
930 mroute_sk = rcu_dereference(mrt->mroute_sk);
931 if (mroute_sk == NULL) {
940 ret = sock_queue_rcv_skb(mroute_sk, skb);
944 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
952 * Queue a packet for resolution. It gets locked cache entry!
956 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
961 const struct iphdr *iph = ip_hdr(skb);
963 spin_lock_bh(&mfc_unres_lock);
964 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
965 if (c->mfc_mcastgrp == iph->daddr &&
966 c->mfc_origin == iph->saddr) {
974 * Create a new entry if allowable
977 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
978 (c = ipmr_cache_alloc_unres()) == NULL) {
979 spin_unlock_bh(&mfc_unres_lock);
986 * Fill in the new cache entry
989 c->mfc_origin = iph->saddr;
990 c->mfc_mcastgrp = iph->daddr;
993 * Reflect first query at mrouted.
995 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
997 /* If the report failed throw the cache entry
1000 spin_unlock_bh(&mfc_unres_lock);
1007 atomic_inc(&mrt->cache_resolve_queue_len);
1008 list_add(&c->list, &mrt->mfc_unres_queue);
1010 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1011 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1015 * See if we can append the packet
1017 if (c->mfc_un.unres.unresolved.qlen>3) {
1021 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1025 spin_unlock_bh(&mfc_unres_lock);
1030 * MFC cache manipulation by user space mroute daemon
1033 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1036 struct mfc_cache *c, *next;
1038 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1040 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1041 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1042 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1043 write_lock_bh(&mrt_lock);
1045 write_unlock_bh(&mrt_lock);
1054 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1055 struct mfcctl *mfc, int mrtsock)
1059 struct mfc_cache *uc, *c;
1061 if (mfc->mfcc_parent >= MAXVIFS)
1064 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1066 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1067 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1068 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1075 write_lock_bh(&mrt_lock);
1076 c->mfc_parent = mfc->mfcc_parent;
1077 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1079 c->mfc_flags |= MFC_STATIC;
1080 write_unlock_bh(&mrt_lock);
1084 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1087 c = ipmr_cache_alloc();
1091 c->mfc_origin = mfc->mfcc_origin.s_addr;
1092 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1093 c->mfc_parent = mfc->mfcc_parent;
1094 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1096 c->mfc_flags |= MFC_STATIC;
1098 write_lock_bh(&mrt_lock);
1099 list_add(&c->list, &mrt->mfc_cache_array[line]);
1100 write_unlock_bh(&mrt_lock);
1103 * Check to see if we resolved a queued list. If so we
1104 * need to send on the frames and tidy up.
1107 spin_lock_bh(&mfc_unres_lock);
1108 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1109 if (uc->mfc_origin == c->mfc_origin &&
1110 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1111 list_del(&uc->list);
1112 atomic_dec(&mrt->cache_resolve_queue_len);
1117 if (list_empty(&mrt->mfc_unres_queue))
1118 del_timer(&mrt->ipmr_expire_timer);
1119 spin_unlock_bh(&mfc_unres_lock);
1122 ipmr_cache_resolve(net, mrt, uc, c);
1123 ipmr_cache_free(uc);
1129 * Close the multicast socket, and clear the vif tables etc
1132 static void mroute_clean_tables(struct mr_table *mrt)
1136 struct mfc_cache *c, *next;
1139 * Shut down all active vif entries
1141 for (i = 0; i < mrt->maxvif; i++) {
1142 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1143 vif_delete(mrt, i, 0, &list);
1145 unregister_netdevice_many(&list);
1150 for (i = 0; i < MFC_LINES; i++) {
1151 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1152 if (c->mfc_flags&MFC_STATIC)
1154 write_lock_bh(&mrt_lock);
1156 write_unlock_bh(&mrt_lock);
1162 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1163 spin_lock_bh(&mfc_unres_lock);
1164 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1166 ipmr_destroy_unres(mrt, c);
1168 spin_unlock_bh(&mfc_unres_lock);
1172 /* called from ip_ra_control(), before an RCU grace period,
1173 * we dont need to call synchronize_rcu() here
1175 static void mrtsock_destruct(struct sock *sk)
1177 struct net *net = sock_net(sk);
1178 struct mr_table *mrt;
1181 ipmr_for_each_table(mrt, net) {
1182 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1183 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1184 rcu_assign_pointer(mrt->mroute_sk, NULL);
1185 mroute_clean_tables(mrt);
1192 * Socket options and virtual interface manipulation. The whole
1193 * virtual interface system is a complete heap, but unfortunately
1194 * that's how BSD mrouted happens to think. Maybe one day with a proper
1195 * MOSPF/PIM router set up we can clean this up.
1198 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1203 struct net *net = sock_net(sk);
1204 struct mr_table *mrt;
1206 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1210 if (optname != MRT_INIT) {
1211 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1212 !capable(CAP_NET_ADMIN))
1218 if (sk->sk_type != SOCK_RAW ||
1219 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1221 if (optlen != sizeof(int))
1222 return -ENOPROTOOPT;
1225 if (rtnl_dereference(mrt->mroute_sk)) {
1230 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1232 rcu_assign_pointer(mrt->mroute_sk, sk);
1233 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1238 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1240 return ip_ra_control(sk, 0, NULL);
1243 if (optlen != sizeof(vif))
1245 if (copy_from_user(&vif, optval, sizeof(vif)))
1247 if (vif.vifc_vifi >= MAXVIFS)
1250 if (optname == MRT_ADD_VIF) {
1251 ret = vif_add(net, mrt, &vif,
1252 sk == rtnl_dereference(mrt->mroute_sk));
1254 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1260 * Manipulate the forwarding caches. These live
1261 * in a sort of kernel/user symbiosis.
1265 if (optlen != sizeof(mfc))
1267 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1270 if (optname == MRT_DEL_MFC)
1271 ret = ipmr_mfc_delete(mrt, &mfc);
1273 ret = ipmr_mfc_add(net, mrt, &mfc,
1274 sk == rtnl_dereference(mrt->mroute_sk));
1278 * Control PIM assert.
1283 if (get_user(v,(int __user *)optval))
1285 mrt->mroute_do_assert = (v) ? 1 : 0;
1288 #ifdef CONFIG_IP_PIMSM
1293 if (get_user(v,(int __user *)optval))
1299 if (v != mrt->mroute_do_pim) {
1300 mrt->mroute_do_pim = v;
1301 mrt->mroute_do_assert = v;
1307 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1312 if (optlen != sizeof(u32))
1314 if (get_user(v, (u32 __user *)optval))
1319 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1322 if (!ipmr_new_table(net, v))
1324 raw_sk(sk)->ipmr_table = v;
1331 * Spurious command, or MRT_VERSION which you cannot
1335 return -ENOPROTOOPT;
1340 * Getsock opt support for the multicast routing system.
1343 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1347 struct net *net = sock_net(sk);
1348 struct mr_table *mrt;
1350 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1354 if (optname != MRT_VERSION &&
1355 #ifdef CONFIG_IP_PIMSM
1358 optname!=MRT_ASSERT)
1359 return -ENOPROTOOPT;
1361 if (get_user(olr, optlen))
1364 olr = min_t(unsigned int, olr, sizeof(int));
1368 if (put_user(olr, optlen))
1370 if (optname == MRT_VERSION)
1372 #ifdef CONFIG_IP_PIMSM
1373 else if (optname == MRT_PIM)
1374 val = mrt->mroute_do_pim;
1377 val = mrt->mroute_do_assert;
1378 if (copy_to_user(optval, &val, olr))
1384 * The IP multicast ioctl support routines.
1387 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1389 struct sioc_sg_req sr;
1390 struct sioc_vif_req vr;
1391 struct vif_device *vif;
1392 struct mfc_cache *c;
1393 struct net *net = sock_net(sk);
1394 struct mr_table *mrt;
1396 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1402 if (copy_from_user(&vr, arg, sizeof(vr)))
1404 if (vr.vifi >= mrt->maxvif)
1406 read_lock(&mrt_lock);
1407 vif = &mrt->vif_table[vr.vifi];
1408 if (VIF_EXISTS(mrt, vr.vifi)) {
1409 vr.icount = vif->pkt_in;
1410 vr.ocount = vif->pkt_out;
1411 vr.ibytes = vif->bytes_in;
1412 vr.obytes = vif->bytes_out;
1413 read_unlock(&mrt_lock);
1415 if (copy_to_user(arg, &vr, sizeof(vr)))
1419 read_unlock(&mrt_lock);
1420 return -EADDRNOTAVAIL;
1422 if (copy_from_user(&sr, arg, sizeof(sr)))
1425 read_lock(&mrt_lock);
1426 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1428 sr.pktcnt = c->mfc_un.res.pkt;
1429 sr.bytecnt = c->mfc_un.res.bytes;
1430 sr.wrong_if = c->mfc_un.res.wrong_if;
1431 read_unlock(&mrt_lock);
1433 if (copy_to_user(arg, &sr, sizeof(sr)))
1437 read_unlock(&mrt_lock);
1438 return -EADDRNOTAVAIL;
1440 return -ENOIOCTLCMD;
1445 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1447 struct net_device *dev = ptr;
1448 struct net *net = dev_net(dev);
1449 struct mr_table *mrt;
1450 struct vif_device *v;
1454 if (event != NETDEV_UNREGISTER)
1457 ipmr_for_each_table(mrt, net) {
1458 v = &mrt->vif_table[0];
1459 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1461 vif_delete(mrt, ct, 1, &list);
1464 unregister_netdevice_many(&list);
1469 static struct notifier_block ip_mr_notifier = {
1470 .notifier_call = ipmr_device_event,
1474 * Encapsulate a packet by attaching a valid IPIP header to it.
1475 * This avoids tunnel drivers and other mess and gives us the speed so
1476 * important for multicast video.
1479 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1482 struct iphdr *old_iph = ip_hdr(skb);
1484 skb_push(skb, sizeof(struct iphdr));
1485 skb->transport_header = skb->network_header;
1486 skb_reset_network_header(skb);
1490 iph->tos = old_iph->tos;
1491 iph->ttl = old_iph->ttl;
1495 iph->protocol = IPPROTO_IPIP;
1497 iph->tot_len = htons(skb->len);
1498 ip_select_ident(iph, skb_dst(skb), NULL);
1501 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1505 static inline int ipmr_forward_finish(struct sk_buff *skb)
1507 struct ip_options * opt = &(IPCB(skb)->opt);
1509 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1511 if (unlikely(opt->optlen))
1512 ip_forward_options(skb);
1514 return dst_output(skb);
1518 * Processing handlers for ipmr_forward
1521 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1522 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1524 const struct iphdr *iph = ip_hdr(skb);
1525 struct vif_device *vif = &mrt->vif_table[vifi];
1526 struct net_device *dev;
1530 if (vif->dev == NULL)
1533 #ifdef CONFIG_IP_PIMSM
1534 if (vif->flags & VIFF_REGISTER) {
1536 vif->bytes_out += skb->len;
1537 vif->dev->stats.tx_bytes += skb->len;
1538 vif->dev->stats.tx_packets++;
1539 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1544 if (vif->flags&VIFF_TUNNEL) {
1545 struct flowi fl = { .oif = vif->link,
1547 { .daddr = vif->remote,
1548 .saddr = vif->local,
1549 .tos = RT_TOS(iph->tos) } },
1550 .proto = IPPROTO_IPIP };
1551 if (ip_route_output_key(net, &rt, &fl))
1553 encap = sizeof(struct iphdr);
1555 struct flowi fl = { .oif = vif->link,
1557 { .daddr = iph->daddr,
1558 .tos = RT_TOS(iph->tos) } },
1559 .proto = IPPROTO_IPIP };
1560 if (ip_route_output_key(net, &rt, &fl))
1566 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1567 /* Do not fragment multicasts. Alas, IPv4 does not
1568 allow to send ICMP, so that packets will disappear
1572 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1577 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1579 if (skb_cow(skb, encap)) {
1585 vif->bytes_out += skb->len;
1588 skb_dst_set(skb, &rt->dst);
1589 ip_decrease_ttl(ip_hdr(skb));
1591 /* FIXME: forward and output firewalls used to be called here.
1592 * What do we do with netfilter? -- RR */
1593 if (vif->flags & VIFF_TUNNEL) {
1594 ip_encap(skb, vif->local, vif->remote);
1595 /* FIXME: extra output firewall step used to be here. --RR */
1596 vif->dev->stats.tx_packets++;
1597 vif->dev->stats.tx_bytes += skb->len;
1600 IPCB(skb)->flags |= IPSKB_FORWARDED;
1603 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1604 * not only before forwarding, but after forwarding on all output
1605 * interfaces. It is clear, if mrouter runs a multicasting
1606 * program, it should receive packets not depending to what interface
1607 * program is joined.
1608 * If we will not make it, the program will have to join on all
1609 * interfaces. On the other hand, multihoming host (or router, but
1610 * not mrouter) cannot join to more than one interface - it will
1611 * result in receiving multiple packets.
1613 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1614 ipmr_forward_finish);
1621 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1625 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1626 if (mrt->vif_table[ct].dev == dev)
1632 /* "local" means that we should preserve one skb (for local delivery) */
1634 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1635 struct sk_buff *skb, struct mfc_cache *cache,
1641 vif = cache->mfc_parent;
1642 cache->mfc_un.res.pkt++;
1643 cache->mfc_un.res.bytes += skb->len;
1646 * Wrong interface: drop packet and (maybe) send PIM assert.
1648 if (mrt->vif_table[vif].dev != skb->dev) {
1651 if (skb_rtable(skb)->fl.iif == 0) {
1652 /* It is our own packet, looped back.
1653 Very complicated situation...
1655 The best workaround until routing daemons will be
1656 fixed is not to redistribute packet, if it was
1657 send through wrong interface. It means, that
1658 multicast applications WILL NOT work for
1659 (S,G), which have default multicast route pointing
1660 to wrong oif. In any case, it is not a good
1661 idea to use multicasting applications on router.
1666 cache->mfc_un.res.wrong_if++;
1667 true_vifi = ipmr_find_vif(mrt, skb->dev);
1669 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1670 /* pimsm uses asserts, when switching from RPT to SPT,
1671 so that we cannot check that packet arrived on an oif.
1672 It is bad, but otherwise we would need to move pretty
1673 large chunk of pimd to kernel. Ough... --ANK
1675 (mrt->mroute_do_pim ||
1676 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1678 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1679 cache->mfc_un.res.last_assert = jiffies;
1680 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1685 mrt->vif_table[vif].pkt_in++;
1686 mrt->vif_table[vif].bytes_in += skb->len;
1691 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1692 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1696 ipmr_queue_xmit(net, mrt, skb2, cache,
1704 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1706 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1708 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1721 * Multicast packets for forwarding arrive here
1722 * Called with rcu_read_lock();
1725 int ip_mr_input(struct sk_buff *skb)
1727 struct mfc_cache *cache;
1728 struct net *net = dev_net(skb->dev);
1729 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1730 struct mr_table *mrt;
1733 /* Packet is looped back after forward, it should not be
1734 forwarded second time, but still can be delivered locally.
1736 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1739 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1746 if (IPCB(skb)->opt.router_alert) {
1747 if (ip_call_ra_chain(skb))
1749 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1750 /* IGMPv1 (and broken IGMPv2 implementations sort of
1751 * Cisco IOS <= 11.2(8)) do not put router alert
1752 * option to IGMP packets destined to routable
1753 * groups. It is very bad, because it means
1754 * that we can forward NO IGMP messages.
1756 struct sock *mroute_sk;
1758 mroute_sk = rcu_dereference(mrt->mroute_sk);
1761 raw_rcv(mroute_sk, skb);
1767 read_lock(&mrt_lock);
1768 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1771 * No usable cache entry
1773 if (cache == NULL) {
1777 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1778 ip_local_deliver(skb);
1780 read_unlock(&mrt_lock);
1786 vif = ipmr_find_vif(mrt, skb->dev);
1788 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1789 read_unlock(&mrt_lock);
1793 read_unlock(&mrt_lock);
1798 ip_mr_forward(net, mrt, skb, cache, local);
1800 read_unlock(&mrt_lock);
1803 return ip_local_deliver(skb);
1809 return ip_local_deliver(skb);
1814 #ifdef CONFIG_IP_PIMSM
1815 /* called with rcu_read_lock() */
1816 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1817 unsigned int pimlen)
1819 struct net_device *reg_dev = NULL;
1820 struct iphdr *encap;
1822 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1825 a. packet is really destinted to a multicast group
1826 b. packet is not a NULL-REGISTER
1827 c. packet is not truncated
1829 if (!ipv4_is_multicast(encap->daddr) ||
1830 encap->tot_len == 0 ||
1831 ntohs(encap->tot_len) + pimlen > skb->len)
1834 read_lock(&mrt_lock);
1835 if (mrt->mroute_reg_vif_num >= 0)
1836 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1837 read_unlock(&mrt_lock);
1839 if (reg_dev == NULL)
1842 skb->mac_header = skb->network_header;
1843 skb_pull(skb, (u8 *)encap - skb->data);
1844 skb_reset_network_header(skb);
1845 skb->protocol = htons(ETH_P_IP);
1846 skb->ip_summed = CHECKSUM_NONE;
1847 skb->pkt_type = PACKET_HOST;
1849 skb_tunnel_rx(skb, reg_dev);
1853 return NET_RX_SUCCESS;
1857 #ifdef CONFIG_IP_PIMSM_V1
1859 * Handle IGMP messages of PIMv1
1862 int pim_rcv_v1(struct sk_buff * skb)
1864 struct igmphdr *pim;
1865 struct net *net = dev_net(skb->dev);
1866 struct mr_table *mrt;
1868 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1871 pim = igmp_hdr(skb);
1873 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1876 if (!mrt->mroute_do_pim ||
1877 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1880 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1888 #ifdef CONFIG_IP_PIMSM_V2
1889 static int pim_rcv(struct sk_buff * skb)
1891 struct pimreghdr *pim;
1892 struct net *net = dev_net(skb->dev);
1893 struct mr_table *mrt;
1895 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1898 pim = (struct pimreghdr *)skb_transport_header(skb);
1899 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1900 (pim->flags&PIM_NULL_REGISTER) ||
1901 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1902 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1905 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1908 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1916 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1917 struct mfc_cache *c, struct rtmsg *rtm)
1920 struct rtnexthop *nhp;
1921 u8 *b = skb_tail_pointer(skb);
1922 struct rtattr *mp_head;
1924 /* If cache is unresolved, don't try to parse IIF and OIF */
1925 if (c->mfc_parent >= MAXVIFS)
1928 if (VIF_EXISTS(mrt, c->mfc_parent))
1929 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1931 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1933 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1934 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1935 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1936 goto rtattr_failure;
1937 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1938 nhp->rtnh_flags = 0;
1939 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1940 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1941 nhp->rtnh_len = sizeof(*nhp);
1944 mp_head->rta_type = RTA_MULTIPATH;
1945 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1946 rtm->rtm_type = RTN_MULTICAST;
1954 int ipmr_get_route(struct net *net,
1955 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1958 struct mr_table *mrt;
1959 struct mfc_cache *cache;
1960 struct rtable *rt = skb_rtable(skb);
1962 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1966 read_lock(&mrt_lock);
1967 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1969 if (cache == NULL) {
1970 struct sk_buff *skb2;
1972 struct net_device *dev;
1976 read_unlock(&mrt_lock);
1981 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1982 read_unlock(&mrt_lock);
1985 skb2 = skb_clone(skb, GFP_ATOMIC);
1987 read_unlock(&mrt_lock);
1991 skb_push(skb2, sizeof(struct iphdr));
1992 skb_reset_network_header(skb2);
1994 iph->ihl = sizeof(struct iphdr) >> 2;
1995 iph->saddr = rt->rt_src;
1996 iph->daddr = rt->rt_dst;
1998 err = ipmr_cache_unresolved(mrt, vif, skb2);
1999 read_unlock(&mrt_lock);
2003 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2004 cache->mfc_flags |= MFC_NOTIFY;
2005 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2006 read_unlock(&mrt_lock);
2010 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2011 u32 pid, u32 seq, struct mfc_cache *c)
2013 struct nlmsghdr *nlh;
2016 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2020 rtm = nlmsg_data(nlh);
2021 rtm->rtm_family = RTNL_FAMILY_IPMR;
2022 rtm->rtm_dst_len = 32;
2023 rtm->rtm_src_len = 32;
2025 rtm->rtm_table = mrt->id;
2026 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2027 rtm->rtm_type = RTN_MULTICAST;
2028 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2029 rtm->rtm_protocol = RTPROT_UNSPEC;
2032 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2033 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2035 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2036 goto nla_put_failure;
2038 return nlmsg_end(skb, nlh);
2041 nlmsg_cancel(skb, nlh);
2045 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2047 struct net *net = sock_net(skb->sk);
2048 struct mr_table *mrt;
2049 struct mfc_cache *mfc;
2050 unsigned int t = 0, s_t;
2051 unsigned int h = 0, s_h;
2052 unsigned int e = 0, s_e;
2058 read_lock(&mrt_lock);
2059 ipmr_for_each_table(mrt, net) {
2064 for (h = s_h; h < MFC_LINES; h++) {
2065 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2068 if (ipmr_fill_mroute(mrt, skb,
2069 NETLINK_CB(cb->skb).pid,
2083 read_unlock(&mrt_lock);
2092 #ifdef CONFIG_PROC_FS
2094 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2096 struct ipmr_vif_iter {
2097 struct seq_net_private p;
2098 struct mr_table *mrt;
2102 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2103 struct ipmr_vif_iter *iter,
2106 struct mr_table *mrt = iter->mrt;
2108 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2109 if (!VIF_EXISTS(mrt, iter->ct))
2112 return &mrt->vif_table[iter->ct];
2117 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2118 __acquires(mrt_lock)
2120 struct ipmr_vif_iter *iter = seq->private;
2121 struct net *net = seq_file_net(seq);
2122 struct mr_table *mrt;
2124 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2126 return ERR_PTR(-ENOENT);
2130 read_lock(&mrt_lock);
2131 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2135 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2137 struct ipmr_vif_iter *iter = seq->private;
2138 struct net *net = seq_file_net(seq);
2139 struct mr_table *mrt = iter->mrt;
2142 if (v == SEQ_START_TOKEN)
2143 return ipmr_vif_seq_idx(net, iter, 0);
2145 while (++iter->ct < mrt->maxvif) {
2146 if (!VIF_EXISTS(mrt, iter->ct))
2148 return &mrt->vif_table[iter->ct];
2153 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2154 __releases(mrt_lock)
2156 read_unlock(&mrt_lock);
2159 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2161 struct ipmr_vif_iter *iter = seq->private;
2162 struct mr_table *mrt = iter->mrt;
2164 if (v == SEQ_START_TOKEN) {
2166 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2168 const struct vif_device *vif = v;
2169 const char *name = vif->dev ? vif->dev->name : "none";
2172 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2173 vif - mrt->vif_table,
2174 name, vif->bytes_in, vif->pkt_in,
2175 vif->bytes_out, vif->pkt_out,
2176 vif->flags, vif->local, vif->remote);
2181 static const struct seq_operations ipmr_vif_seq_ops = {
2182 .start = ipmr_vif_seq_start,
2183 .next = ipmr_vif_seq_next,
2184 .stop = ipmr_vif_seq_stop,
2185 .show = ipmr_vif_seq_show,
2188 static int ipmr_vif_open(struct inode *inode, struct file *file)
2190 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2191 sizeof(struct ipmr_vif_iter));
2194 static const struct file_operations ipmr_vif_fops = {
2195 .owner = THIS_MODULE,
2196 .open = ipmr_vif_open,
2198 .llseek = seq_lseek,
2199 .release = seq_release_net,
2202 struct ipmr_mfc_iter {
2203 struct seq_net_private p;
2204 struct mr_table *mrt;
2205 struct list_head *cache;
2210 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2211 struct ipmr_mfc_iter *it, loff_t pos)
2213 struct mr_table *mrt = it->mrt;
2214 struct mfc_cache *mfc;
2216 read_lock(&mrt_lock);
2217 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2218 it->cache = &mrt->mfc_cache_array[it->ct];
2219 list_for_each_entry(mfc, it->cache, list)
2223 read_unlock(&mrt_lock);
2225 spin_lock_bh(&mfc_unres_lock);
2226 it->cache = &mrt->mfc_unres_queue;
2227 list_for_each_entry(mfc, it->cache, list)
2230 spin_unlock_bh(&mfc_unres_lock);
2237 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2239 struct ipmr_mfc_iter *it = seq->private;
2240 struct net *net = seq_file_net(seq);
2241 struct mr_table *mrt;
2243 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2245 return ERR_PTR(-ENOENT);
2250 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2254 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2256 struct mfc_cache *mfc = v;
2257 struct ipmr_mfc_iter *it = seq->private;
2258 struct net *net = seq_file_net(seq);
2259 struct mr_table *mrt = it->mrt;
2263 if (v == SEQ_START_TOKEN)
2264 return ipmr_mfc_seq_idx(net, seq->private, 0);
2266 if (mfc->list.next != it->cache)
2267 return list_entry(mfc->list.next, struct mfc_cache, list);
2269 if (it->cache == &mrt->mfc_unres_queue)
2272 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2274 while (++it->ct < MFC_LINES) {
2275 it->cache = &mrt->mfc_cache_array[it->ct];
2276 if (list_empty(it->cache))
2278 return list_first_entry(it->cache, struct mfc_cache, list);
2281 /* exhausted cache_array, show unresolved */
2282 read_unlock(&mrt_lock);
2283 it->cache = &mrt->mfc_unres_queue;
2286 spin_lock_bh(&mfc_unres_lock);
2287 if (!list_empty(it->cache))
2288 return list_first_entry(it->cache, struct mfc_cache, list);
2291 spin_unlock_bh(&mfc_unres_lock);
2297 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2299 struct ipmr_mfc_iter *it = seq->private;
2300 struct mr_table *mrt = it->mrt;
2302 if (it->cache == &mrt->mfc_unres_queue)
2303 spin_unlock_bh(&mfc_unres_lock);
2304 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2305 read_unlock(&mrt_lock);
2308 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2312 if (v == SEQ_START_TOKEN) {
2314 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2316 const struct mfc_cache *mfc = v;
2317 const struct ipmr_mfc_iter *it = seq->private;
2318 const struct mr_table *mrt = it->mrt;
2320 seq_printf(seq, "%08X %08X %-3hd",
2321 (__force u32) mfc->mfc_mcastgrp,
2322 (__force u32) mfc->mfc_origin,
2325 if (it->cache != &mrt->mfc_unres_queue) {
2326 seq_printf(seq, " %8lu %8lu %8lu",
2327 mfc->mfc_un.res.pkt,
2328 mfc->mfc_un.res.bytes,
2329 mfc->mfc_un.res.wrong_if);
2330 for (n = mfc->mfc_un.res.minvif;
2331 n < mfc->mfc_un.res.maxvif; n++ ) {
2332 if (VIF_EXISTS(mrt, n) &&
2333 mfc->mfc_un.res.ttls[n] < 255)
2336 n, mfc->mfc_un.res.ttls[n]);
2339 /* unresolved mfc_caches don't contain
2340 * pkt, bytes and wrong_if values
2342 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2344 seq_putc(seq, '\n');
2349 static const struct seq_operations ipmr_mfc_seq_ops = {
2350 .start = ipmr_mfc_seq_start,
2351 .next = ipmr_mfc_seq_next,
2352 .stop = ipmr_mfc_seq_stop,
2353 .show = ipmr_mfc_seq_show,
2356 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2358 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2359 sizeof(struct ipmr_mfc_iter));
2362 static const struct file_operations ipmr_mfc_fops = {
2363 .owner = THIS_MODULE,
2364 .open = ipmr_mfc_open,
2366 .llseek = seq_lseek,
2367 .release = seq_release_net,
2371 #ifdef CONFIG_IP_PIMSM_V2
2372 static const struct net_protocol pim_protocol = {
2380 * Setup for IP multicast routing
2382 static int __net_init ipmr_net_init(struct net *net)
2386 err = ipmr_rules_init(net);
2390 #ifdef CONFIG_PROC_FS
2392 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2394 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2395 goto proc_cache_fail;
2399 #ifdef CONFIG_PROC_FS
2401 proc_net_remove(net, "ip_mr_vif");
2403 ipmr_rules_exit(net);
2409 static void __net_exit ipmr_net_exit(struct net *net)
2411 #ifdef CONFIG_PROC_FS
2412 proc_net_remove(net, "ip_mr_cache");
2413 proc_net_remove(net, "ip_mr_vif");
2415 ipmr_rules_exit(net);
2418 static struct pernet_operations ipmr_net_ops = {
2419 .init = ipmr_net_init,
2420 .exit = ipmr_net_exit,
2423 int __init ip_mr_init(void)
2427 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2428 sizeof(struct mfc_cache),
2429 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2434 err = register_pernet_subsys(&ipmr_net_ops);
2436 goto reg_pernet_fail;
2438 err = register_netdevice_notifier(&ip_mr_notifier);
2440 goto reg_notif_fail;
2441 #ifdef CONFIG_IP_PIMSM_V2
2442 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2443 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2445 goto add_proto_fail;
2448 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2451 #ifdef CONFIG_IP_PIMSM_V2
2453 unregister_netdevice_notifier(&ip_mr_notifier);
2456 unregister_pernet_subsys(&ipmr_net_ops);
2458 kmem_cache_destroy(mrt_cachep);