net: Add optional SKB arg to dst_ops->neigh_lookup().

[firefly-linux-kernel-4.4.55.git] / net / ipv6 / route.c
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 999a982ad3fd7d7abac40211b50320fc4c038109..4b581c675bb2ba1f162313f0e4d717fdeff4d2b9 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -99,10 +99,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
         if (!(rt->dst.flags & DST_HOST))
                 return NULL;
  
-       if (!rt->rt6i_peer)
-               rt6_bind_peer(rt, 1);
-
-       peer = rt->rt6i_peer;
+       peer = rt6_get_peer_create(rt);
         if (peer) {
                 u32 *old_p = __DST_METRICS_PTR(old);
                 unsigned long prev, new;
@@ -123,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
         return p;
  }
  
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+                                            struct sk_buff *skb,
+                                            const void *daddr)
  {
         struct in6_addr *p = &rt->rt6i_gateway;
  
         if (!ipv6_addr_any(p))
                 return (const void *) p;
+       else if (skb)
+               return &ipv6_hdr(skb)->daddr;
         return daddr;
  }
  
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+                                         struct sk_buff *skb,
+                                         const void *daddr)
  {
         struct rt6_info *rt = (struct rt6_info *) dst;
         struct neighbour *n;
  
-       daddr = choose_neigh_daddr(rt, daddr);
+       daddr = choose_neigh_daddr(rt, skb, daddr);
         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
         if (n)
                 return n;
@@ -261,16 +264,19 @@ static struct rt6_info ip6_blk_hole_entry_template = {
  #endif
  
  /* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
                                              struct net_device *dev,
-                                            int flags)
+                                            int flags,
+                                            struct fib6_table *table)
  {
-       struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+       struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+                                       0, 0, flags);
  
-       if (rt)
+       if (rt) {
                 memset(&rt->rt6i_table, 0,
                        sizeof(*rt) - sizeof(struct dst_entry));
-
+               rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+       }
         return rt;
  }
  
@@ -278,7 +284,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
  {
         struct rt6_info *rt = (struct rt6_info *)dst;
         struct inet6_dev *idev = rt->rt6i_idev;
-       struct inet_peer *peer = rt->rt6i_peer;
  
         if (!(rt->dst.flags & DST_HOST))
                 dst_destroy_metrics_generic(dst);
@@ -291,8 +296,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
                 dst_release(dst->from);
  
-       if (peer) {
-               rt->rt6i_peer = NULL;
+       if (rt6_has_peer(rt)) {
+               struct inet_peer *peer = rt6_peer_ptr(rt);
                 inet_putpeer(peer);
         }
  }
@@ -306,13 +311,20 @@ static u32 rt6_peer_genid(void)
  
  void rt6_bind_peer(struct rt6_info *rt, int create)
  {
+       struct inet_peer_base *base;
         struct inet_peer *peer;
  
-       peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
-       if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
-               inet_putpeer(peer);
-       else
-               rt->rt6i_peer_genid = rt6_peer_genid();
+       base = inetpeer_base_ptr(rt->_rt6i_peer);
+       if (!base)
+               return;
+
+       peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+       if (peer) {
+               if (!rt6_set_peer(rt, peer))
+                       inet_putpeer(peer);
+               else
+                       rt->rt6i_peer_genid = rt6_peer_genid();
+       }
  }
  
  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -931,6 +943,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
  {
         int flags = 0;
  
+       fl6->flowi6_iif = net->loopback_dev->ifindex;
+
         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
                 flags |= RT6_LOOKUP_F_IFACE;
  
@@ -952,6 +966,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
         if (rt) {
                 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+               rt6_init_peer(rt, net->ipv6.peers);
  
                 new = &rt->dst;
  
@@ -996,7 +1011,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
  
         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
-                       if (!rt->rt6i_peer)
+                       if (!rt6_has_peer(rt))
                                 rt6_bind_peer(rt, 0);
                         rt->rt6i_peer_genid = rt6_peer_genid();
                 }
@@ -1042,7 +1057,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
  {
         struct rt6_info *rt6 = (struct rt6_info*)dst;
  
+       dst_confirm(dst);
         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+               struct net *net = dev_net(dst->dev);
+
                 rt6->rt6i_flags |= RTF_MODIFIED;
                 if (mtu < IPV6_MIN_MTU) {
                         u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1069,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
                         dst_metric_set(dst, RTAX_FEATURES, features);
                 }
                 dst_metric_set(dst, RTAX_MTU, mtu);
+               rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
         }
  }
  
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+                    int oif, u32 mark)
+{
+       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_oif = oif;
+       fl6.flowi6_mark = mark;
+       fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
+
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+               ip6_rt_update_pmtu(dst, ntohl(mtu));
+       dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+       ip6_update_pmtu(skb, sock_net(sk), mtu,
+                       sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
  static unsigned int ip6_default_advmss(const struct dst_entry *dst)
  {
         struct net_device *dev = dst->dev;
@@ -1110,7 +1158,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
         if (unlikely(!idev))
                 return ERR_PTR(-ENODEV);
  
-       rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+       rt = ip6_dst_alloc(net, dev, 0, NULL);
         if (unlikely(!rt)) {
                 in6_dev_put(idev);
                 dst = ERR_PTR(-ENOMEM);
@@ -1120,7 +1168,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
         if (neigh)
                 neigh_hold(neigh);
         else {
-               neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+               neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
                 if (IS_ERR(neigh)) {
                         in6_dev_put(idev);
                         dst_free(&rt->dst);
@@ -1292,7 +1340,7 @@ int ip6_route_add(struct fib6_config *cfg)
         if (!table)
                 goto out;
  
-       rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+       rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
  
         if (!rt) {
                 err = -ENOMEM;
@@ -1696,116 +1744,6 @@ out:
         dst_release(&rt->dst);
  }
  
-/*
- *     Handle ICMP "packet too big" messages
- *     i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                            struct net *net, u32 pmtu, int ifindex)
-{
-       struct rt6_info *rt, *nrt;
-       int allfrag = 0;
-again:
-       rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
-       if (!rt)
-               return;
-
-       if (rt6_check_expired(rt)) {
-               ip6_del_rt(rt);
-               goto again;
-       }
-
-       if (pmtu >= dst_mtu(&rt->dst))
-               goto out;
-
-       if (pmtu < IPV6_MIN_MTU) {
-               /*
-                * According to RFC2460, PMTU is set to the IPv6 Minimum Link
-                * MTU (1280) and a fragment header should always be included
-                * after a node receiving Too Big message reporting PMTU is
-                * less than the IPv6 Minimum Link MTU.
-                */
-               pmtu = IPV6_MIN_MTU;
-               allfrag = 1;
-       }
-
-       /* New mtu received -> path was valid.
-          They are sent only in response to data packets,
-          so that this nexthop apparently is reachable. --ANK
-        */
-       dst_confirm(&rt->dst);
-
-       /* Host route. If it is static, it would be better
-          not to override it, but add new one, so that
-          when cache entry will expire old pmtu
-          would return automatically.
-        */
-       if (rt->rt6i_flags & RTF_CACHE) {
-               dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
-               if (allfrag) {
-                       u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
-                       features |= RTAX_FEATURE_ALLFRAG;
-                       dst_metric_set(&rt->dst, RTAX_FEATURES, features);
-               }
-               rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-               rt->rt6i_flags |= RTF_MODIFIED;
-               goto out;
-       }
-
-       /* Network route.
-          Two cases are possible:
-          1. It is connected route. Action: COW
-          2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-        */
-       if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
-               nrt = rt6_alloc_cow(rt, daddr, saddr);
-       else
-               nrt = rt6_alloc_clone(rt, daddr);
-
-       if (nrt) {
-               dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
-               if (allfrag) {
-                       u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
-                       features |= RTAX_FEATURE_ALLFRAG;
-                       dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
-               }
-
-               /* According to RFC 1981, detecting PMTU increase shouldn't be
-                * happened within 5 mins, the recommended timer is 10 mins.
-                * Here this route expiration time is set to ip6_rt_mtu_expires
-                * which is 10 mins. After 10 mins the decreased pmtu is expired
-                * and detecting PMTU increase will be automatically happened.
-                */
-               rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-               nrt->rt6i_flags |= RTF_DYNAMIC;
-               ip6_ins_rt(nrt);
-       }
-out:
-       dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                       struct net_device *dev, u32 pmtu)
-{
-       struct net *net = dev_net(dev);
-
-       /*
-        * RFC 1981 states that a node "MUST reduce the size of the packets it
-        * is sending along the path" that caused the Packet Too Big message.
-        * Since it's not possible in the general case to determine which
-        * interface was used to send the original packet, we update the MTU
-        * on the interface that will be used to send future packets. We also
-        * update the MTU on the interface that received the Packet Too Big in
-        * case the original packet was forced out that interface with
-        * SO_BINDTODEVICE or similar. This is the next best thing to the
-        * correct behaviour, which would be to update the MTU on all
-        * interfaces.
-        */
-       rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
-       rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
-}
-
  /*
   *     Misc support functions
   */
@@ -1814,8 +1752,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
                                     const struct in6_addr *dest)
  {
         struct net *net = dev_net(ort->dst.dev);
-       struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-                                           ort->dst.dev, 0);
+       struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+                                           ort->rt6i_table);
  
         if (rt) {
                 rt->dst.input = ort->dst.input;
@@ -2099,8 +2037,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
                                     bool anycast)
  {
         struct net *net = dev_net(idev->dev);
-       struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-                                           net->loopback_dev, 0);
+       struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
         int err;
  
         if (!rt) {
@@ -2521,7 +2458,9 @@ static int rt6_fill_node(struct net *net,
         else
                 expires = INT_MAX;
  
-       peer = rt->rt6i_peer;
+       peer = NULL;
+       if (rt6_has_peer(rt))
+               peer = rt6_peer_ptr(rt);
         ts = tsage = 0;
         if (peer && peer->tcp_ts_stamp) {
                 ts = peer->tcp_ts;
@@ -2957,10 +2896,6 @@ static int __net_init ip6_route_net_init(struct net *net)
         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
  
-#ifdef CONFIG_PROC_FS
-       proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
-       proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
-#endif
         net->ipv6.ip6_rt_gc_expire = 30*HZ;
  
         ret = 0;
@@ -2981,10 +2916,6 @@ out_ip6_dst_ops:
  
  static void __net_exit ip6_route_net_exit(struct net *net)
  {
-#ifdef CONFIG_PROC_FS
-       proc_net_remove(net, "ipv6_route");
-       proc_net_remove(net, "rt6_stats");
-#endif
         kfree(net->ipv6.ip6_null_entry);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
         kfree(net->ipv6.ip6_prohibit_entry);
@@ -2993,11 +2924,58 @@ static void __net_exit ip6_route_net_exit(struct net *net)
         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
  }
  
+static int __net_init ip6_route_net_init_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+       proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
+       proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+#endif
+       return 0;
+}
+
+static void __net_exit ip6_route_net_exit_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+       proc_net_remove(net, "ipv6_route");
+       proc_net_remove(net, "rt6_stats");
+#endif
+}
+
  static struct pernet_operations ip6_route_net_ops = {
         .init = ip6_route_net_init,
         .exit = ip6_route_net_exit,
  };
  
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+       struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+       if (!bp)
+               return -ENOMEM;
+       inet_peer_base_init(bp);
+       net->ipv6.peers = bp;
+       return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+       struct inet_peer_base *bp = net->ipv6.peers;
+
+       net->ipv6.peers = NULL;
+       inetpeer_invalidate_tree(bp);
+       kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+       .init   =       ipv6_inetpeer_init,
+       .exit   =       ipv6_inetpeer_exit,
+};
+
+static struct pernet_operations ip6_route_net_late_ops = {
+       .init = ip6_route_net_init_late,
+       .exit = ip6_route_net_exit_late,
+};
+
  static struct notifier_block ip6_route_dev_notifier = {
         .notifier_call = ip6_route_dev_notify,
         .priority = 0,
@@ -3018,10 +2996,14 @@ int __init ip6_route_init(void)
         if (ret)
                 goto out_kmem_cache;
  
-       ret = register_pernet_subsys(&ip6_route_net_ops);
+       ret = register_pernet_subsys(&ipv6_inetpeer_ops);
         if (ret)
                 goto out_dst_entries;
  
+       ret = register_pernet_subsys(&ip6_route_net_ops);
+       if (ret)
+               goto out_register_inetpeer;
+
         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
  
         /* Registering of the loopback is done before this portion of code,
@@ -3047,19 +3029,25 @@ int __init ip6_route_init(void)
         if (ret)
                 goto xfrm6_init;
  
+       ret = register_pernet_subsys(&ip6_route_net_late_ops);
+       if (ret)
+               goto fib6_rules_init;
+
         ret = -ENOBUFS;
         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
  
         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
         if (ret)
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
  
  out:
         return ret;
  
+out_register_late_subsys:
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
  fib6_rules_init:
         fib6_rules_cleanup();
  xfrm6_init:
@@ -3068,6 +3056,8 @@ out_fib6_init:
         fib6_gc_cleanup();
  out_register_subsys:
         unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+       unregister_pernet_subsys(&ipv6_inetpeer_ops);
  out_dst_entries:
         dst_entries_destroy(&ip6_dst_blackhole_ops);
  out_kmem_cache:
@@ -3078,9 +3068,11 @@ out_kmem_cache:
  void ip6_route_cleanup(void)
  {
         unregister_netdevice_notifier(&ip6_route_dev_notifier);
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
         fib6_rules_cleanup();
         xfrm6_fini();
         fib6_gc_cleanup();
+       unregister_pernet_subsys(&ipv6_inetpeer_ops);
         unregister_pernet_subsys(&ip6_route_net_ops);
         dst_entries_destroy(&ip6_dst_blackhole_ops);
         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);