ef83a2530e98bf58573e7865b3aea9871ba62dae
[firefly-linux-kernel-4.4.55.git] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/moduleparam.h>
15 #include <linux/kernel.h>
16 #include <linux/netdevice.h>
17 #include <linux/etherdevice.h>
18 #include <linux/string.h>
19 #include <linux/if_arp.h>
20 #include <linux/inetdevice.h>
21 #include <linux/inet.h>
22 #include <linux/interrupt.h>
23 #include <linux/netpoll.h>
24 #include <linux/sched.h>
25 #include <linux/delay.h>
26 #include <linux/rcupdate.h>
27 #include <linux/workqueue.h>
28 #include <linux/slab.h>
29 #include <linux/export.h>
30 #include <linux/if_vlan.h>
31 #include <net/tcp.h>
32 #include <net/udp.h>
33 #include <net/addrconf.h>
34 #include <net/ndisc.h>
35 #include <net/ip6_checksum.h>
36 #include <asm/unaligned.h>
37 #include <trace/events/napi.h>
38
39 /*
40  * We maintain a small pool of fully-sized skbs, to make sure the
41  * message gets out even in extreme OOM situations.
42  */
43
44 #define MAX_UDP_CHUNK 1460
45 #define MAX_SKBS 32
46
47 static struct sk_buff_head skb_pool;
48
49 static atomic_t trapped;
50
51 DEFINE_STATIC_SRCU(netpoll_srcu);
52
53 #define USEC_PER_POLL   50
54 #define NETPOLL_RX_ENABLED  1
55 #define NETPOLL_RX_DROP     2
56
57 #define MAX_SKB_SIZE                                                    \
58         (sizeof(struct ethhdr) +                                        \
59          sizeof(struct iphdr) +                                         \
60          sizeof(struct udphdr) +                                        \
61          MAX_UDP_CHUNK)
62
63 static void zap_completion_queue(void);
64 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
65 static void netpoll_async_cleanup(struct work_struct *work);
66
67 static unsigned int carrier_timeout = 4;
68 module_param(carrier_timeout, uint, 0644);
69
70 #define np_info(np, fmt, ...)                           \
71         pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
72 #define np_err(np, fmt, ...)                            \
73         pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
74 #define np_notice(np, fmt, ...)                         \
75         pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
76
77 static void queue_process(struct work_struct *work)
78 {
79         struct netpoll_info *npinfo =
80                 container_of(work, struct netpoll_info, tx_work.work);
81         struct sk_buff *skb;
82         unsigned long flags;
83
84         while ((skb = skb_dequeue(&npinfo->txq))) {
85                 struct net_device *dev = skb->dev;
86                 const struct net_device_ops *ops = dev->netdev_ops;
87                 struct netdev_queue *txq;
88
89                 if (!netif_device_present(dev) || !netif_running(dev)) {
90                         __kfree_skb(skb);
91                         continue;
92                 }
93
94                 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
95
96                 local_irq_save(flags);
97                 __netif_tx_lock(txq, smp_processor_id());
98                 if (netif_xmit_frozen_or_stopped(txq) ||
99                     ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
100                         skb_queue_head(&npinfo->txq, skb);
101                         __netif_tx_unlock(txq);
102                         local_irq_restore(flags);
103
104                         schedule_delayed_work(&npinfo->tx_work, HZ/10);
105                         return;
106                 }
107                 __netif_tx_unlock(txq);
108                 local_irq_restore(flags);
109         }
110 }
111
112 static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
113                             unsigned short ulen, __be32 saddr, __be32 daddr)
114 {
115         __wsum psum;
116
117         if (uh->check == 0 || skb_csum_unnecessary(skb))
118                 return 0;
119
120         psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
121
122         if (skb->ip_summed == CHECKSUM_COMPLETE &&
123             !csum_fold(csum_add(psum, skb->csum)))
124                 return 0;
125
126         skb->csum = psum;
127
128         return __skb_checksum_complete(skb);
129 }
130
131 /*
132  * Check whether delayed processing was scheduled for our NIC. If so,
133  * we attempt to grab the poll lock and use ->poll() to pump the card.
134  * If this fails, either we've recursed in ->poll() or it's already
135  * running on another CPU.
136  *
137  * Note: we don't mask interrupts with this lock because we're using
138  * trylock here and interrupts are already disabled in the softirq
139  * case. Further, we test the poll_owner to avoid recursion on UP
140  * systems where the lock doesn't exist.
141  *
142  * In cases where there is bi-directional communications, reading only
143  * one message at a time can lead to packets being dropped by the
144  * network adapter, forcing superfluous retries and possibly timeouts.
145  * Thus, we set our budget to greater than 1.
146  */
147 static int poll_one_napi(struct napi_struct *napi, int budget)
148 {
149         int work;
150
151         /* net_rx_action's ->poll() invocations and our's are
152          * synchronized by this test which is only made while
153          * holding the napi->poll_lock.
154          */
155         if (!test_bit(NAPI_STATE_SCHED, &napi->state))
156                 return budget;
157
158         set_bit(NAPI_STATE_NPSVC, &napi->state);
159
160         work = napi->poll(napi, budget);
161         WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
162         trace_napi_poll(napi);
163
164         clear_bit(NAPI_STATE_NPSVC, &napi->state);
165
166         return budget - work;
167 }
168
169 static void poll_napi(struct net_device *dev, int budget)
170 {
171         struct napi_struct *napi;
172
173         list_for_each_entry(napi, &dev->napi_list, dev_list) {
174                 if (napi->poll_owner != smp_processor_id() &&
175                     spin_trylock(&napi->poll_lock)) {
176                         budget = poll_one_napi(napi, budget);
177                         spin_unlock(&napi->poll_lock);
178                 }
179         }
180 }
181
182 static void service_neigh_queue(struct netpoll_info *npi)
183 {
184         if (npi) {
185                 struct sk_buff *skb;
186
187                 while ((skb = skb_dequeue(&npi->neigh_tx)))
188                         netpoll_neigh_reply(skb, npi);
189         }
190 }
191
192 static void netpoll_poll_dev(struct net_device *dev)
193 {
194         const struct net_device_ops *ops;
195         struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
196         int budget = 16;
197
198         /* Don't do any rx activity if the dev_lock mutex is held
199          * the dev_open/close paths use this to block netpoll activity
200          * while changing device state
201          */
202         if (down_trylock(&ni->dev_lock))
203                 return;
204
205         if (!netif_running(dev)) {
206                 up(&ni->dev_lock);
207                 return;
208         }
209
210         ni->rx_flags |= NETPOLL_RX_DROP;
211         atomic_inc(&trapped);
212
213         ops = dev->netdev_ops;
214         if (!ops->ndo_poll_controller) {
215                 up(&ni->dev_lock);
216                 return;
217         }
218
219         /* Process pending work on NIC */
220         ops->ndo_poll_controller(dev);
221
222         poll_napi(dev, budget);
223
224         atomic_dec(&trapped);
225         ni->rx_flags &= ~NETPOLL_RX_DROP;
226
227         up(&ni->dev_lock);
228
229         if (dev->flags & IFF_SLAVE) {
230                 if (ni) {
231                         struct net_device *bond_dev;
232                         struct sk_buff *skb;
233                         struct netpoll_info *bond_ni;
234
235                         bond_dev = netdev_master_upper_dev_get_rcu(dev);
236                         bond_ni = rcu_dereference_bh(bond_dev->npinfo);
237                         while ((skb = skb_dequeue(&ni->neigh_tx))) {
238                                 skb->dev = bond_dev;
239                                 skb_queue_tail(&bond_ni->neigh_tx, skb);
240                         }
241                 }
242         }
243
244         service_neigh_queue(ni);
245
246         zap_completion_queue();
247 }
248
249 void netpoll_rx_disable(struct net_device *dev)
250 {
251         struct netpoll_info *ni;
252         int idx;
253         might_sleep();
254         idx = srcu_read_lock(&netpoll_srcu);
255         ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
256         if (ni)
257                 down(&ni->dev_lock);
258         srcu_read_unlock(&netpoll_srcu, idx);
259 }
260 EXPORT_SYMBOL(netpoll_rx_disable);
261
262 void netpoll_rx_enable(struct net_device *dev)
263 {
264         struct netpoll_info *ni;
265         rcu_read_lock();
266         ni = rcu_dereference(dev->npinfo);
267         if (ni)
268                 up(&ni->dev_lock);
269         rcu_read_unlock();
270 }
271 EXPORT_SYMBOL(netpoll_rx_enable);
272
273 static void refill_skbs(void)
274 {
275         struct sk_buff *skb;
276         unsigned long flags;
277
278         spin_lock_irqsave(&skb_pool.lock, flags);
279         while (skb_pool.qlen < MAX_SKBS) {
280                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
281                 if (!skb)
282                         break;
283
284                 __skb_queue_tail(&skb_pool, skb);
285         }
286         spin_unlock_irqrestore(&skb_pool.lock, flags);
287 }
288
289 static void zap_completion_queue(void)
290 {
291         unsigned long flags;
292         struct softnet_data *sd = &get_cpu_var(softnet_data);
293
294         if (sd->completion_queue) {
295                 struct sk_buff *clist;
296
297                 local_irq_save(flags);
298                 clist = sd->completion_queue;
299                 sd->completion_queue = NULL;
300                 local_irq_restore(flags);
301
302                 while (clist != NULL) {
303                         struct sk_buff *skb = clist;
304                         clist = clist->next;
305                         if (skb->destructor) {
306                                 atomic_inc(&skb->users);
307                                 dev_kfree_skb_any(skb); /* put this one back */
308                         } else {
309                                 __kfree_skb(skb);
310                         }
311                 }
312         }
313
314         put_cpu_var(softnet_data);
315 }
316
317 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
318 {
319         int count = 0;
320         struct sk_buff *skb;
321
322         zap_completion_queue();
323         refill_skbs();
324 repeat:
325
326         skb = alloc_skb(len, GFP_ATOMIC);
327         if (!skb)
328                 skb = skb_dequeue(&skb_pool);
329
330         if (!skb) {
331                 if (++count < 10) {
332                         netpoll_poll_dev(np->dev);
333                         goto repeat;
334                 }
335                 return NULL;
336         }
337
338         atomic_set(&skb->users, 1);
339         skb_reserve(skb, reserve);
340         return skb;
341 }
342
343 static int netpoll_owner_active(struct net_device *dev)
344 {
345         struct napi_struct *napi;
346
347         list_for_each_entry(napi, &dev->napi_list, dev_list) {
348                 if (napi->poll_owner == smp_processor_id())
349                         return 1;
350         }
351         return 0;
352 }
353
354 /* call with IRQ disabled */
355 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
356                              struct net_device *dev)
357 {
358         int status = NETDEV_TX_BUSY;
359         unsigned long tries;
360         const struct net_device_ops *ops = dev->netdev_ops;
361         /* It is up to the caller to keep npinfo alive. */
362         struct netpoll_info *npinfo;
363
364         WARN_ON_ONCE(!irqs_disabled());
365
366         npinfo = rcu_dereference_bh(np->dev->npinfo);
367         if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
368                 __kfree_skb(skb);
369                 return;
370         }
371
372         /* don't get messages out of order, and no recursion */
373         if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
374                 struct netdev_queue *txq;
375
376                 txq = netdev_pick_tx(dev, skb, NULL);
377
378                 /* try until next clock tick */
379                 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
380                      tries > 0; --tries) {
381                         if (__netif_tx_trylock(txq)) {
382                                 if (!netif_xmit_stopped(txq)) {
383                                         if (vlan_tx_tag_present(skb) &&
384                                             !vlan_hw_offload_capable(netif_skb_features(skb),
385                                                                      skb->vlan_proto)) {
386                                                 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
387                                                 if (unlikely(!skb)) {
388                                                         /* This is actually a packet drop, but we
389                                                          * don't want the code at the end of this
390                                                          * function to try and re-queue a NULL skb.
391                                                          */
392                                                         status = NETDEV_TX_OK;
393                                                         goto unlock_txq;
394                                                 }
395                                                 skb->vlan_tci = 0;
396                                         }
397
398                                         status = ops->ndo_start_xmit(skb, dev);
399                                         if (status == NETDEV_TX_OK)
400                                                 txq_trans_update(txq);
401                                 }
402                         unlock_txq:
403                                 __netif_tx_unlock(txq);
404
405                                 if (status == NETDEV_TX_OK)
406                                         break;
407
408                         }
409
410                         /* tickle device maybe there is some cleanup */
411                         netpoll_poll_dev(np->dev);
412
413                         udelay(USEC_PER_POLL);
414                 }
415
416                 WARN_ONCE(!irqs_disabled(),
417                         "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
418                         dev->name, ops->ndo_start_xmit);
419
420         }
421
422         if (status != NETDEV_TX_OK) {
423                 skb_queue_tail(&npinfo->txq, skb);
424                 schedule_delayed_work(&npinfo->tx_work,0);
425         }
426 }
427 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
428
429 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
430 {
431         int total_len, ip_len, udp_len;
432         struct sk_buff *skb;
433         struct udphdr *udph;
434         struct iphdr *iph;
435         struct ethhdr *eth;
436         static atomic_t ip_ident;
437         struct ipv6hdr *ip6h;
438
439         udp_len = len + sizeof(*udph);
440         if (np->ipv6)
441                 ip_len = udp_len + sizeof(*ip6h);
442         else
443                 ip_len = udp_len + sizeof(*iph);
444
445         total_len = ip_len + LL_RESERVED_SPACE(np->dev);
446
447         skb = find_skb(np, total_len + np->dev->needed_tailroom,
448                        total_len - len);
449         if (!skb)
450                 return;
451
452         skb_copy_to_linear_data(skb, msg, len);
453         skb_put(skb, len);
454
455         skb_push(skb, sizeof(*udph));
456         skb_reset_transport_header(skb);
457         udph = udp_hdr(skb);
458         udph->source = htons(np->local_port);
459         udph->dest = htons(np->remote_port);
460         udph->len = htons(udp_len);
461
462         if (np->ipv6) {
463                 udph->check = 0;
464                 udph->check = csum_ipv6_magic(&np->local_ip.in6,
465                                               &np->remote_ip.in6,
466                                               udp_len, IPPROTO_UDP,
467                                               csum_partial(udph, udp_len, 0));
468                 if (udph->check == 0)
469                         udph->check = CSUM_MANGLED_0;
470
471                 skb_push(skb, sizeof(*ip6h));
472                 skb_reset_network_header(skb);
473                 ip6h = ipv6_hdr(skb);
474
475                 /* ip6h->version = 6; ip6h->priority = 0; */
476                 put_unaligned(0x60, (unsigned char *)ip6h);
477                 ip6h->flow_lbl[0] = 0;
478                 ip6h->flow_lbl[1] = 0;
479                 ip6h->flow_lbl[2] = 0;
480
481                 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
482                 ip6h->nexthdr = IPPROTO_UDP;
483                 ip6h->hop_limit = 32;
484                 ip6h->saddr = np->local_ip.in6;
485                 ip6h->daddr = np->remote_ip.in6;
486
487                 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
488                 skb_reset_mac_header(skb);
489                 skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
490         } else {
491                 udph->check = 0;
492                 udph->check = csum_tcpudp_magic(np->local_ip.ip,
493                                                 np->remote_ip.ip,
494                                                 udp_len, IPPROTO_UDP,
495                                                 csum_partial(udph, udp_len, 0));
496                 if (udph->check == 0)
497                         udph->check = CSUM_MANGLED_0;
498
499                 skb_push(skb, sizeof(*iph));
500                 skb_reset_network_header(skb);
501                 iph = ip_hdr(skb);
502
503                 /* iph->version = 4; iph->ihl = 5; */
504                 put_unaligned(0x45, (unsigned char *)iph);
505                 iph->tos      = 0;
506                 put_unaligned(htons(ip_len), &(iph->tot_len));
507                 iph->id       = htons(atomic_inc_return(&ip_ident));
508                 iph->frag_off = 0;
509                 iph->ttl      = 64;
510                 iph->protocol = IPPROTO_UDP;
511                 iph->check    = 0;
512                 put_unaligned(np->local_ip.ip, &(iph->saddr));
513                 put_unaligned(np->remote_ip.ip, &(iph->daddr));
514                 iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
515
516                 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
517                 skb_reset_mac_header(skb);
518                 skb->protocol = eth->h_proto = htons(ETH_P_IP);
519         }
520
521         ether_addr_copy(eth->h_source, np->dev->dev_addr);
522         ether_addr_copy(eth->h_dest, np->remote_mac);
523
524         skb->dev = np->dev;
525
526         netpoll_send_skb(np, skb);
527 }
528 EXPORT_SYMBOL(netpoll_send_udp);
529
530 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
531 {
532         int size, type = ARPOP_REPLY;
533         __be32 sip, tip;
534         unsigned char *sha;
535         struct sk_buff *send_skb;
536         struct netpoll *np, *tmp;
537         unsigned long flags;
538         int hlen, tlen;
539         int hits = 0, proto;
540
541         if (!netpoll_rx_processing(npinfo))
542                 return;
543
544         /* Before checking the packet, we do some early
545            inspection whether this is interesting at all */
546         spin_lock_irqsave(&npinfo->rx_lock, flags);
547         list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
548                 if (np->dev == skb->dev)
549                         hits++;
550         }
551         spin_unlock_irqrestore(&npinfo->rx_lock, flags);
552
553         /* No netpoll struct is using this dev */
554         if (!hits)
555                 return;
556
557         proto = ntohs(eth_hdr(skb)->h_proto);
558         if (proto == ETH_P_ARP) {
559                 struct arphdr *arp;
560                 unsigned char *arp_ptr;
561                 /* No arp on this interface */
562                 if (skb->dev->flags & IFF_NOARP)
563                         return;
564
565                 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
566                         return;
567
568                 skb_reset_network_header(skb);
569                 skb_reset_transport_header(skb);
570                 arp = arp_hdr(skb);
571
572                 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
573                      arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
574                     arp->ar_pro != htons(ETH_P_IP) ||
575                     arp->ar_op != htons(ARPOP_REQUEST))
576                         return;
577
578                 arp_ptr = (unsigned char *)(arp+1);
579                 /* save the location of the src hw addr */
580                 sha = arp_ptr;
581                 arp_ptr += skb->dev->addr_len;
582                 memcpy(&sip, arp_ptr, 4);
583                 arp_ptr += 4;
584                 /* If we actually cared about dst hw addr,
585                    it would get copied here */
586                 arp_ptr += skb->dev->addr_len;
587                 memcpy(&tip, arp_ptr, 4);
588
589                 /* Should we ignore arp? */
590                 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
591                         return;
592
593                 size = arp_hdr_len(skb->dev);
594
595                 spin_lock_irqsave(&npinfo->rx_lock, flags);
596                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
597                         if (tip != np->local_ip.ip)
598                                 continue;
599
600                         hlen = LL_RESERVED_SPACE(np->dev);
601                         tlen = np->dev->needed_tailroom;
602                         send_skb = find_skb(np, size + hlen + tlen, hlen);
603                         if (!send_skb)
604                                 continue;
605
606                         skb_reset_network_header(send_skb);
607                         arp = (struct arphdr *) skb_put(send_skb, size);
608                         send_skb->dev = skb->dev;
609                         send_skb->protocol = htons(ETH_P_ARP);
610
611                         /* Fill the device header for the ARP frame */
612                         if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
613                                             sha, np->dev->dev_addr,
614                                             send_skb->len) < 0) {
615                                 kfree_skb(send_skb);
616                                 continue;
617                         }
618
619                         /*
620                          * Fill out the arp protocol part.
621                          *
622                          * we only support ethernet device type,
623                          * which (according to RFC 1390) should
624                          * always equal 1 (Ethernet).
625                          */
626
627                         arp->ar_hrd = htons(np->dev->type);
628                         arp->ar_pro = htons(ETH_P_IP);
629                         arp->ar_hln = np->dev->addr_len;
630                         arp->ar_pln = 4;
631                         arp->ar_op = htons(type);
632
633                         arp_ptr = (unsigned char *)(arp + 1);
634                         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
635                         arp_ptr += np->dev->addr_len;
636                         memcpy(arp_ptr, &tip, 4);
637                         arp_ptr += 4;
638                         memcpy(arp_ptr, sha, np->dev->addr_len);
639                         arp_ptr += np->dev->addr_len;
640                         memcpy(arp_ptr, &sip, 4);
641
642                         netpoll_send_skb(np, send_skb);
643
644                         /* If there are several rx_skb_hooks for the same
645                          * address we're fine by sending a single reply
646                          */
647                         break;
648                 }
649                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
650         } else if( proto == ETH_P_IPV6) {
651 #if IS_ENABLED(CONFIG_IPV6)
652                 struct nd_msg *msg;
653                 u8 *lladdr = NULL;
654                 struct ipv6hdr *hdr;
655                 struct icmp6hdr *icmp6h;
656                 const struct in6_addr *saddr;
657                 const struct in6_addr *daddr;
658                 struct inet6_dev *in6_dev = NULL;
659                 struct in6_addr *target;
660
661                 in6_dev = in6_dev_get(skb->dev);
662                 if (!in6_dev || !in6_dev->cnf.accept_ra)
663                         return;
664
665                 if (!pskb_may_pull(skb, skb->len))
666                         return;
667
668                 msg = (struct nd_msg *)skb_transport_header(skb);
669
670                 __skb_push(skb, skb->data - skb_transport_header(skb));
671
672                 if (ipv6_hdr(skb)->hop_limit != 255)
673                         return;
674                 if (msg->icmph.icmp6_code != 0)
675                         return;
676                 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
677                         return;
678
679                 saddr = &ipv6_hdr(skb)->saddr;
680                 daddr = &ipv6_hdr(skb)->daddr;
681
682                 size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
683
684                 spin_lock_irqsave(&npinfo->rx_lock, flags);
685                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
686                         if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
687                                 continue;
688
689                         hlen = LL_RESERVED_SPACE(np->dev);
690                         tlen = np->dev->needed_tailroom;
691                         send_skb = find_skb(np, size + hlen + tlen, hlen);
692                         if (!send_skb)
693                                 continue;
694
695                         send_skb->protocol = htons(ETH_P_IPV6);
696                         send_skb->dev = skb->dev;
697
698                         skb_reset_network_header(send_skb);
699                         hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
700                         *(__be32*)hdr = htonl(0x60000000);
701                         hdr->payload_len = htons(size);
702                         hdr->nexthdr = IPPROTO_ICMPV6;
703                         hdr->hop_limit = 255;
704                         hdr->saddr = *saddr;
705                         hdr->daddr = *daddr;
706
707                         icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
708                         icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
709                         icmp6h->icmp6_router = 0;
710                         icmp6h->icmp6_solicited = 1;
711
712                         target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
713                         *target = msg->target;
714                         icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
715                                                               IPPROTO_ICMPV6,
716                                                               csum_partial(icmp6h,
717                                                                            size, 0));
718
719                         if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
720                                             lladdr, np->dev->dev_addr,
721                                             send_skb->len) < 0) {
722                                 kfree_skb(send_skb);
723                                 continue;
724                         }
725
726                         netpoll_send_skb(np, send_skb);
727
728                         /* If there are several rx_skb_hooks for the same
729                          * address, we're fine by sending a single reply
730                          */
731                         break;
732                 }
733                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
734 #endif
735         }
736 }
737
738 static bool pkt_is_ns(struct sk_buff *skb)
739 {
740         struct nd_msg *msg;
741         struct ipv6hdr *hdr;
742
743         if (skb->protocol != htons(ETH_P_ARP))
744                 return false;
745         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
746                 return false;
747
748         msg = (struct nd_msg *)skb_transport_header(skb);
749         __skb_push(skb, skb->data - skb_transport_header(skb));
750         hdr = ipv6_hdr(skb);
751
752         if (hdr->nexthdr != IPPROTO_ICMPV6)
753                 return false;
754         if (hdr->hop_limit != 255)
755                 return false;
756         if (msg->icmph.icmp6_code != 0)
757                 return false;
758         if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
759                 return false;
760
761         return true;
762 }
763
764 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
765 {
766         int proto, len, ulen, data_len;
767         int hits = 0, offset;
768         const struct iphdr *iph;
769         struct udphdr *uh;
770         struct netpoll *np, *tmp;
771         uint16_t source;
772
773         if (!netpoll_rx_processing(npinfo))
774                 goto out;
775
776         if (skb->dev->type != ARPHRD_ETHER)
777                 goto out;
778
779         /* check if netpoll clients need ARP */
780         if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
781                 skb_queue_tail(&npinfo->neigh_tx, skb);
782                 return 1;
783         } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
784                 skb_queue_tail(&npinfo->neigh_tx, skb);
785                 return 1;
786         }
787
788         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
789                 skb = vlan_untag(skb);
790                 if (unlikely(!skb))
791                         goto out;
792         }
793
794         proto = ntohs(eth_hdr(skb)->h_proto);
795         if (proto != ETH_P_IP && proto != ETH_P_IPV6)
796                 goto out;
797         if (skb->pkt_type == PACKET_OTHERHOST)
798                 goto out;
799         if (skb_shared(skb))
800                 goto out;
801
802         if (proto == ETH_P_IP) {
803                 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
804                         goto out;
805                 iph = (struct iphdr *)skb->data;
806                 if (iph->ihl < 5 || iph->version != 4)
807                         goto out;
808                 if (!pskb_may_pull(skb, iph->ihl*4))
809                         goto out;
810                 iph = (struct iphdr *)skb->data;
811                 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
812                         goto out;
813
814                 len = ntohs(iph->tot_len);
815                 if (skb->len < len || len < iph->ihl*4)
816                         goto out;
817
818                 /*
819                  * Our transport medium may have padded the buffer out.
820                  * Now We trim to the true length of the frame.
821                  */
822                 if (pskb_trim_rcsum(skb, len))
823                         goto out;
824
825                 iph = (struct iphdr *)skb->data;
826                 if (iph->protocol != IPPROTO_UDP)
827                         goto out;
828
829                 len -= iph->ihl*4;
830                 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
831                 offset = (unsigned char *)(uh + 1) - skb->data;
832                 ulen = ntohs(uh->len);
833                 data_len = skb->len - offset;
834                 source = ntohs(uh->source);
835
836                 if (ulen != len)
837                         goto out;
838                 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
839                         goto out;
840                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
841                         if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
842                                 continue;
843                         if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
844                                 continue;
845                         if (np->local_port && np->local_port != ntohs(uh->dest))
846                                 continue;
847
848                         np->rx_skb_hook(np, source, skb, offset, data_len);
849                         hits++;
850                 }
851         } else {
852 #if IS_ENABLED(CONFIG_IPV6)
853                 const struct ipv6hdr *ip6h;
854
855                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
856                         goto out;
857                 ip6h = (struct ipv6hdr *)skb->data;
858                 if (ip6h->version != 6)
859                         goto out;
860                 len = ntohs(ip6h->payload_len);
861                 if (!len)
862                         goto out;
863                 if (len + sizeof(struct ipv6hdr) > skb->len)
864                         goto out;
865                 if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
866                         goto out;
867                 ip6h = ipv6_hdr(skb);
868                 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
869                         goto out;
870                 uh = udp_hdr(skb);
871                 offset = (unsigned char *)(uh + 1) - skb->data;
872                 ulen = ntohs(uh->len);
873                 data_len = skb->len - offset;
874                 source = ntohs(uh->source);
875                 if (ulen != skb->len)
876                         goto out;
877                 if (udp6_csum_init(skb, uh, IPPROTO_UDP))
878                         goto out;
879                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
880                         if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
881                                 continue;
882                         if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
883                                 continue;
884                         if (np->local_port && np->local_port != ntohs(uh->dest))
885                                 continue;
886
887                         np->rx_skb_hook(np, source, skb, offset, data_len);
888                         hits++;
889                 }
890 #endif
891         }
892
893         if (!hits)
894                 goto out;
895
896         kfree_skb(skb);
897         return 1;
898
899 out:
900         if (atomic_read(&trapped)) {
901                 kfree_skb(skb);
902                 return 1;
903         }
904
905         return 0;
906 }
907
908 void netpoll_print_options(struct netpoll *np)
909 {
910         np_info(np, "local port %d\n", np->local_port);
911         if (np->ipv6)
912                 np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
913         else
914                 np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
915         np_info(np, "interface '%s'\n", np->dev_name);
916         np_info(np, "remote port %d\n", np->remote_port);
917         if (np->ipv6)
918                 np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
919         else
920                 np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
921         np_info(np, "remote ethernet address %pM\n", np->remote_mac);
922 }
923 EXPORT_SYMBOL(netpoll_print_options);
924
925 static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
926 {
927         const char *end;
928
929         if (!strchr(str, ':') &&
930             in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
931                 if (!*end)
932                         return 0;
933         }
934         if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
935 #if IS_ENABLED(CONFIG_IPV6)
936                 if (!*end)
937                         return 1;
938 #else
939                 return -1;
940 #endif
941         }
942         return -1;
943 }
944
945 int netpoll_parse_options(struct netpoll *np, char *opt)
946 {
947         char *cur=opt, *delim;
948         int ipv6;
949         bool ipversion_set = false;
950
951         if (*cur != '@') {
952                 if ((delim = strchr(cur, '@')) == NULL)
953                         goto parse_failed;
954                 *delim = 0;
955                 if (kstrtou16(cur, 10, &np->local_port))
956                         goto parse_failed;
957                 cur = delim;
958         }
959         cur++;
960
961         if (*cur != '/') {
962                 ipversion_set = true;
963                 if ((delim = strchr(cur, '/')) == NULL)
964                         goto parse_failed;
965                 *delim = 0;
966                 ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
967                 if (ipv6 < 0)
968                         goto parse_failed;
969                 else
970                         np->ipv6 = (bool)ipv6;
971                 cur = delim;
972         }
973         cur++;
974
975         if (*cur != ',') {
976                 /* parse out dev name */
977                 if ((delim = strchr(cur, ',')) == NULL)
978                         goto parse_failed;
979                 *delim = 0;
980                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
981                 cur = delim;
982         }
983         cur++;
984
985         if (*cur != '@') {
986                 /* dst port */
987                 if ((delim = strchr(cur, '@')) == NULL)
988                         goto parse_failed;
989                 *delim = 0;
990                 if (*cur == ' ' || *cur == '\t')
991                         np_info(np, "warning: whitespace is not allowed\n");
992                 if (kstrtou16(cur, 10, &np->remote_port))
993                         goto parse_failed;
994                 cur = delim;
995         }
996         cur++;
997
998         /* dst ip */
999         if ((delim = strchr(cur, '/')) == NULL)
1000                 goto parse_failed;
1001         *delim = 0;
1002         ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
1003         if (ipv6 < 0)
1004                 goto parse_failed;
1005         else if (ipversion_set && np->ipv6 != (bool)ipv6)
1006                 goto parse_failed;
1007         else
1008                 np->ipv6 = (bool)ipv6;
1009         cur = delim + 1;
1010
1011         if (*cur != 0) {
1012                 /* MAC address */
1013                 if (!mac_pton(cur, np->remote_mac))
1014                         goto parse_failed;
1015         }
1016
1017         netpoll_print_options(np);
1018
1019         return 0;
1020
1021  parse_failed:
1022         np_info(np, "couldn't parse config at '%s'!\n", cur);
1023         return -1;
1024 }
1025 EXPORT_SYMBOL(netpoll_parse_options);
1026
1027 int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1028 {
1029         struct netpoll_info *npinfo;
1030         const struct net_device_ops *ops;
1031         unsigned long flags;
1032         int err;
1033
1034         np->dev = ndev;
1035         strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1036         INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1037
1038         if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
1039             !ndev->netdev_ops->ndo_poll_controller) {
1040                 np_err(np, "%s doesn't support polling, aborting\n",
1041                        np->dev_name);
1042                 err = -ENOTSUPP;
1043                 goto out;
1044         }
1045
1046         if (!ndev->npinfo) {
1047                 npinfo = kmalloc(sizeof(*npinfo), gfp);
1048                 if (!npinfo) {
1049                         err = -ENOMEM;
1050                         goto out;
1051                 }
1052
1053                 npinfo->rx_flags = 0;
1054                 INIT_LIST_HEAD(&npinfo->rx_np);
1055
1056                 spin_lock_init(&npinfo->rx_lock);
1057                 sema_init(&npinfo->dev_lock, 1);
1058                 skb_queue_head_init(&npinfo->neigh_tx);
1059                 skb_queue_head_init(&npinfo->txq);
1060                 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1061
1062                 atomic_set(&npinfo->refcnt, 1);
1063
1064                 ops = np->dev->netdev_ops;
1065                 if (ops->ndo_netpoll_setup) {
1066                         err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1067                         if (err)
1068                                 goto free_npinfo;
1069                 }
1070         } else {
1071                 npinfo = rtnl_dereference(ndev->npinfo);
1072                 atomic_inc(&npinfo->refcnt);
1073         }
1074
1075         npinfo->netpoll = np;
1076
1077         if (np->rx_skb_hook) {
1078                 spin_lock_irqsave(&npinfo->rx_lock, flags);
1079                 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1080                 list_add_tail(&np->rx, &npinfo->rx_np);
1081                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1082         }
1083
1084         /* last thing to do is link it to the net device structure */
1085         rcu_assign_pointer(ndev->npinfo, npinfo);
1086
1087         return 0;
1088
1089 free_npinfo:
1090         kfree(npinfo);
1091 out:
1092         return err;
1093 }
1094 EXPORT_SYMBOL_GPL(__netpoll_setup);
1095
1096 int netpoll_setup(struct netpoll *np)
1097 {
1098         struct net_device *ndev = NULL;
1099         struct in_device *in_dev;
1100         int err;
1101
1102         rtnl_lock();
1103         if (np->dev_name) {
1104                 struct net *net = current->nsproxy->net_ns;
1105                 ndev = __dev_get_by_name(net, np->dev_name);
1106         }
1107         if (!ndev) {
1108                 np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1109                 err = -ENODEV;
1110                 goto unlock;
1111         }
1112         dev_hold(ndev);
1113
1114         if (netdev_master_upper_dev_get(ndev)) {
1115                 np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1116                 err = -EBUSY;
1117                 goto put;
1118         }
1119
1120         if (!netif_running(ndev)) {
1121                 unsigned long atmost, atleast;
1122
1123                 np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1124
1125                 err = dev_open(ndev);
1126
1127                 if (err) {
1128                         np_err(np, "failed to open %s\n", ndev->name);
1129                         goto put;
1130                 }
1131
1132                 rtnl_unlock();
1133                 atleast = jiffies + HZ/10;
1134                 atmost = jiffies + carrier_timeout * HZ;
1135                 while (!netif_carrier_ok(ndev)) {
1136                         if (time_after(jiffies, atmost)) {
1137                                 np_notice(np, "timeout waiting for carrier\n");
1138                                 break;
1139                         }
1140                         msleep(1);
1141                 }
1142
1143                 /* If carrier appears to come up instantly, we don't
1144                  * trust it and pause so that we don't pump all our
1145                  * queued console messages into the bitbucket.
1146                  */
1147
1148                 if (time_before(jiffies, atleast)) {
1149                         np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1150                         msleep(4000);
1151                 }
1152                 rtnl_lock();
1153         }
1154
1155         if (!np->local_ip.ip) {
1156                 if (!np->ipv6) {
1157                         in_dev = __in_dev_get_rtnl(ndev);
1158
1159                         if (!in_dev || !in_dev->ifa_list) {
1160                                 np_err(np, "no IP address for %s, aborting\n",
1161                                        np->dev_name);
1162                                 err = -EDESTADDRREQ;
1163                                 goto put;
1164                         }
1165
1166                         np->local_ip.ip = in_dev->ifa_list->ifa_local;
1167                         np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1168                 } else {
1169 #if IS_ENABLED(CONFIG_IPV6)
1170                         struct inet6_dev *idev;
1171
1172                         err = -EDESTADDRREQ;
1173                         idev = __in6_dev_get(ndev);
1174                         if (idev) {
1175                                 struct inet6_ifaddr *ifp;
1176
1177                                 read_lock_bh(&idev->lock);
1178                                 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1179                                         if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1180                                                 continue;
1181                                         np->local_ip.in6 = ifp->addr;
1182                                         err = 0;
1183                                         break;
1184                                 }
1185                                 read_unlock_bh(&idev->lock);
1186                         }
1187                         if (err) {
1188                                 np_err(np, "no IPv6 address for %s, aborting\n",
1189                                        np->dev_name);
1190                                 goto put;
1191                         } else
1192                                 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1193 #else
1194                         np_err(np, "IPv6 is not supported %s, aborting\n",
1195                                np->dev_name);
1196                         err = -EINVAL;
1197                         goto put;
1198 #endif
1199                 }
1200         }
1201
1202         /* fill up the skb queue */
1203         refill_skbs();
1204
1205         err = __netpoll_setup(np, ndev, GFP_KERNEL);
1206         if (err)
1207                 goto put;
1208
1209         rtnl_unlock();
1210         return 0;
1211
1212 put:
1213         dev_put(ndev);
1214 unlock:
1215         rtnl_unlock();
1216         return err;
1217 }
1218 EXPORT_SYMBOL(netpoll_setup);
1219
1220 static int __init netpoll_init(void)
1221 {
1222         skb_queue_head_init(&skb_pool);
1223         return 0;
1224 }
1225 core_initcall(netpoll_init);
1226
1227 static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1228 {
1229         struct netpoll_info *npinfo =
1230                         container_of(rcu_head, struct netpoll_info, rcu);
1231
1232         skb_queue_purge(&npinfo->neigh_tx);
1233         skb_queue_purge(&npinfo->txq);
1234
1235         /* we can't call cancel_delayed_work_sync here, as we are in softirq */
1236         cancel_delayed_work(&npinfo->tx_work);
1237
1238         /* clean after last, unfinished work */
1239         __skb_queue_purge(&npinfo->txq);
1240         /* now cancel it again */
1241         cancel_delayed_work(&npinfo->tx_work);
1242         kfree(npinfo);
1243 }
1244
1245 void __netpoll_cleanup(struct netpoll *np)
1246 {
1247         struct netpoll_info *npinfo;
1248         unsigned long flags;
1249
1250         /* rtnl_dereference would be preferable here but
1251          * rcu_cleanup_netpoll path can put us in here safely without
1252          * holding the rtnl, so plain rcu_dereference it is
1253          */
1254         npinfo = rtnl_dereference(np->dev->npinfo);
1255         if (!npinfo)
1256                 return;
1257
1258         if (!list_empty(&npinfo->rx_np)) {
1259                 spin_lock_irqsave(&npinfo->rx_lock, flags);
1260                 list_del(&np->rx);
1261                 if (list_empty(&npinfo->rx_np))
1262                         npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1263                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1264         }
1265
1266         synchronize_srcu(&netpoll_srcu);
1267
1268         if (atomic_dec_and_test(&npinfo->refcnt)) {
1269                 const struct net_device_ops *ops;
1270
1271                 ops = np->dev->netdev_ops;
1272                 if (ops->ndo_netpoll_cleanup)
1273                         ops->ndo_netpoll_cleanup(np->dev);
1274
1275                 rcu_assign_pointer(np->dev->npinfo, NULL);
1276                 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1277         }
1278 }
1279 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1280
1281 static void netpoll_async_cleanup(struct work_struct *work)
1282 {
1283         struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
1284
1285         rtnl_lock();
1286         __netpoll_cleanup(np);
1287         rtnl_unlock();
1288         kfree(np);
1289 }
1290
1291 void __netpoll_free_async(struct netpoll *np)
1292 {
1293         schedule_work(&np->cleanup_work);
1294 }
1295 EXPORT_SYMBOL_GPL(__netpoll_free_async);
1296
1297 void netpoll_cleanup(struct netpoll *np)
1298 {
1299         rtnl_lock();
1300         if (!np->dev)
1301                 goto out;
1302         __netpoll_cleanup(np);
1303         dev_put(np->dev);
1304         np->dev = NULL;
1305 out:
1306         rtnl_unlock();
1307 }
1308 EXPORT_SYMBOL(netpoll_cleanup);
1309
1310 int netpoll_trap(void)
1311 {
1312         return atomic_read(&trapped);
1313 }
1314 EXPORT_SYMBOL(netpoll_trap);
1315
1316 void netpoll_set_trap(int trap)
1317 {
1318         if (trap)
1319                 atomic_inc(&trapped);
1320         else
1321                 atomic_dec(&trapped);
1322 }
1323 EXPORT_SYMBOL(netpoll_set_trap);