2 * Copyright (c) 2009, Microsoft Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, see <http://www.gnu.org/licenses/>.
17 * Haiyang Zhang <haiyangz@microsoft.com>
18 * Hank Janssen <hjanssen@microsoft.com>
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/init.h>
23 #include <linux/atomic.h>
24 #include <linux/module.h>
25 #include <linux/highmem.h>
26 #include <linux/device.h>
28 #include <linux/delay.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/skbuff.h>
33 #include <linux/if_vlan.h>
35 #include <linux/slab.h>
37 #include <net/route.h>
39 #include <net/pkt_sched.h>
41 #include "hyperv_net.h"
43 struct net_device_context {
44 /* point back to our device context */
45 struct hv_device *device_ctx;
46 struct delayed_work dwork;
47 struct work_struct work;
50 #define RING_SIZE_MIN 64
51 static int ring_size = 128;
52 module_param(ring_size, int, S_IRUGO);
53 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
55 static void do_set_multicast(struct work_struct *w)
57 struct net_device_context *ndevctx =
58 container_of(w, struct net_device_context, work);
59 struct netvsc_device *nvdev;
60 struct rndis_device *rdev;
62 nvdev = hv_get_drvdata(ndevctx->device_ctx);
63 if (nvdev == NULL || nvdev->ndev == NULL)
66 rdev = nvdev->extension;
70 if (nvdev->ndev->flags & IFF_PROMISC)
71 rndis_filter_set_packet_filter(rdev,
72 NDIS_PACKET_TYPE_PROMISCUOUS);
74 rndis_filter_set_packet_filter(rdev,
75 NDIS_PACKET_TYPE_BROADCAST |
76 NDIS_PACKET_TYPE_ALL_MULTICAST |
77 NDIS_PACKET_TYPE_DIRECTED);
80 static void netvsc_set_multicast_list(struct net_device *net)
82 struct net_device_context *net_device_ctx = netdev_priv(net);
84 schedule_work(&net_device_ctx->work);
87 static int netvsc_open(struct net_device *net)
89 struct net_device_context *net_device_ctx = netdev_priv(net);
90 struct hv_device *device_obj = net_device_ctx->device_ctx;
91 struct netvsc_device *nvdev;
92 struct rndis_device *rdev;
95 netif_carrier_off(net);
97 /* Open up the device */
98 ret = rndis_filter_open(device_obj);
100 netdev_err(net, "unable to open device (ret %d).\n", ret);
104 netif_start_queue(net);
106 nvdev = hv_get_drvdata(device_obj);
107 rdev = nvdev->extension;
108 if (!rdev->link_state)
109 netif_carrier_on(net);
114 static int netvsc_close(struct net_device *net)
116 struct net_device_context *net_device_ctx = netdev_priv(net);
117 struct hv_device *device_obj = net_device_ctx->device_ctx;
120 netif_tx_disable(net);
122 /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
123 cancel_work_sync(&net_device_ctx->work);
124 ret = rndis_filter_close(device_obj);
126 netdev_err(net, "unable to close device (ret %d).\n", ret);
131 static void netvsc_xmit_completion(void *context)
133 struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
134 struct sk_buff *skb = (struct sk_buff *)
135 (unsigned long)packet->completion.send.send_completion_tid;
140 dev_kfree_skb_any(skb);
143 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
144 struct hv_page_buffer *pb)
148 /* Deal with compund pages by ignoring unused part
151 page += (offset >> PAGE_SHIFT);
152 offset &= ~PAGE_MASK;
157 bytes = PAGE_SIZE - offset;
160 pb[j].pfn = page_to_pfn(page);
161 pb[j].offset = offset;
167 if (offset == PAGE_SIZE && len) {
177 static void init_page_array(void *hdr, u32 len, struct sk_buff *skb,
178 struct hv_page_buffer *pb)
181 char *data = skb->data;
182 int frags = skb_shinfo(skb)->nr_frags;
185 /* The packet is laid out thus:
188 * 3. skb fragment data
191 slots_used += fill_pg_buf(virt_to_page(hdr),
193 len, &pb[slots_used]);
195 slots_used += fill_pg_buf(virt_to_page(data),
196 offset_in_page(data),
197 skb_headlen(skb), &pb[slots_used]);
199 for (i = 0; i < frags; i++) {
200 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
202 slots_used += fill_pg_buf(skb_frag_page(frag),
204 skb_frag_size(frag), &pb[slots_used]);
208 static int count_skb_frag_slots(struct sk_buff *skb)
210 int i, frags = skb_shinfo(skb)->nr_frags;
213 for (i = 0; i < frags; i++) {
214 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
215 unsigned long size = skb_frag_size(frag);
216 unsigned long offset = frag->page_offset;
218 /* Skip unused frames from start of page */
219 offset &= ~PAGE_MASK;
220 pages += PFN_UP(offset + size);
225 static int netvsc_get_slots(struct sk_buff *skb)
227 char *data = skb->data;
228 unsigned int offset = offset_in_page(data);
229 unsigned int len = skb_headlen(skb);
233 slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
234 frag_slots = count_skb_frag_slots(skb);
235 return slots + frag_slots;
238 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
240 struct net_device_context *net_device_ctx = netdev_priv(net);
241 struct hv_netvsc_packet *packet;
243 unsigned int num_data_pages;
245 /* We will atmost need two pages to describe the rndis
246 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
247 * of pages in a single packet.
249 num_data_pages = netvsc_get_slots(skb) + 2;
250 if (num_data_pages > MAX_PAGE_BUFFER_COUNT) {
251 netdev_err(net, "Packet too big: %u\n", skb->len);
253 net->stats.tx_dropped++;
257 /* Allocate a netvsc packet based on # of frags. */
258 packet = kzalloc(sizeof(struct hv_netvsc_packet) +
259 (num_data_pages * sizeof(struct hv_page_buffer)) +
260 sizeof(struct rndis_message) +
261 NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
263 /* out of memory, drop packet */
264 netdev_err(net, "unable to allocate hv_netvsc_packet\n");
267 net->stats.tx_dropped++;
271 packet->vlan_tci = skb->vlan_tci;
273 packet->extension = (void *)(unsigned long)packet +
274 sizeof(struct hv_netvsc_packet) +
275 (num_data_pages * sizeof(struct hv_page_buffer));
277 /* If the rndis msg goes beyond 1 page, we will add 1 later */
278 packet->page_buf_cnt = num_data_pages - 1;
280 /* Initialize it from the skb */
281 packet->total_data_buflen = skb->len;
283 /* Start filling in the page buffers starting after RNDIS buffer. */
284 init_page_array(NULL, 0, skb, &packet->page_buf[1]);
286 /* Set the completion routine */
287 packet->completion.send.send_completion = netvsc_xmit_completion;
288 packet->completion.send.send_completion_ctx = packet;
289 packet->completion.send.send_completion_tid = (unsigned long)skb;
291 ret = rndis_filter_send(net_device_ctx->device_ctx,
294 net->stats.tx_bytes += skb->len;
295 net->stats.tx_packets++;
298 if (ret != -EAGAIN) {
299 dev_kfree_skb_any(skb);
300 net->stats.tx_dropped++;
304 return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
308 * netvsc_linkstatus_callback - Link up/down notification
310 void netvsc_linkstatus_callback(struct hv_device *device_obj,
313 struct net_device *net;
314 struct net_device_context *ndev_ctx;
315 struct netvsc_device *net_device;
316 struct rndis_device *rdev;
318 net_device = hv_get_drvdata(device_obj);
319 rdev = net_device->extension;
321 rdev->link_state = status != 1;
323 net = net_device->ndev;
325 if (!net || net->reg_state != NETREG_REGISTERED)
328 ndev_ctx = netdev_priv(net);
330 schedule_delayed_work(&ndev_ctx->dwork, 0);
331 schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
333 schedule_delayed_work(&ndev_ctx->dwork, 0);
338 * netvsc_recv_callback - Callback when we receive a packet from the
339 * "wire" on the specified device.
341 int netvsc_recv_callback(struct hv_device *device_obj,
342 struct hv_netvsc_packet *packet)
344 struct net_device *net;
347 net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
348 if (!net || net->reg_state != NETREG_REGISTERED) {
349 packet->status = NVSP_STAT_FAIL;
353 /* Allocate a skb - TODO direct I/O to pages? */
354 skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
355 if (unlikely(!skb)) {
356 ++net->stats.rx_dropped;
357 packet->status = NVSP_STAT_FAIL;
362 * Copy to skb. This copy is needed here since the memory pointed by
363 * hv_netvsc_packet cannot be deallocated
365 memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
366 packet->total_data_buflen);
368 skb->protocol = eth_type_trans(skb, net);
369 skb->ip_summed = CHECKSUM_NONE;
370 if (packet->vlan_tci & VLAN_TAG_PRESENT)
371 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
374 net->stats.rx_packets++;
375 net->stats.rx_bytes += packet->total_data_buflen;
378 * Pass the skb back up. Network stack will deallocate the skb when it
387 static void netvsc_get_drvinfo(struct net_device *net,
388 struct ethtool_drvinfo *info)
390 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
391 strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
394 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
396 struct net_device_context *ndevctx = netdev_priv(ndev);
397 struct hv_device *hdev = ndevctx->device_ctx;
398 struct netvsc_device *nvdev = hv_get_drvdata(hdev);
399 struct netvsc_device_info device_info;
400 int limit = ETH_DATA_LEN;
402 if (nvdev == NULL || nvdev->destroy)
405 if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
408 if (mtu < 68 || mtu > limit)
411 nvdev->start_remove = true;
412 cancel_work_sync(&ndevctx->work);
413 netif_tx_disable(ndev);
414 rndis_filter_device_remove(hdev);
418 ndevctx->device_ctx = hdev;
419 hv_set_drvdata(hdev, ndev);
420 device_info.ring_size = ring_size;
421 rndis_filter_device_add(hdev, &device_info);
422 netif_wake_queue(ndev);
428 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
430 struct net_device_context *ndevctx = netdev_priv(ndev);
431 struct hv_device *hdev = ndevctx->device_ctx;
432 struct sockaddr *addr = p;
433 char save_adr[ETH_ALEN];
434 unsigned char save_aatype;
437 memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
438 save_aatype = ndev->addr_assign_type;
440 err = eth_mac_addr(ndev, p);
444 err = rndis_filter_set_device_mac(hdev, addr->sa_data);
446 /* roll back to saved MAC */
447 memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
448 ndev->addr_assign_type = save_aatype;
455 static const struct ethtool_ops ethtool_ops = {
456 .get_drvinfo = netvsc_get_drvinfo,
457 .get_link = ethtool_op_get_link,
460 static const struct net_device_ops device_ops = {
461 .ndo_open = netvsc_open,
462 .ndo_stop = netvsc_close,
463 .ndo_start_xmit = netvsc_start_xmit,
464 .ndo_set_rx_mode = netvsc_set_multicast_list,
465 .ndo_change_mtu = netvsc_change_mtu,
466 .ndo_validate_addr = eth_validate_addr,
467 .ndo_set_mac_address = netvsc_set_mac_addr,
471 * Send GARP packet to network peers after migrations.
472 * After Quick Migration, the network is not immediately operational in the
473 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
474 * another netif_notify_peers() into a delayed work, otherwise GARP packet
475 * will not be sent after quick migration, and cause network disconnection.
476 * Also, we update the carrier status here.
478 static void netvsc_link_change(struct work_struct *w)
480 struct net_device_context *ndev_ctx;
481 struct net_device *net;
482 struct netvsc_device *net_device;
483 struct rndis_device *rdev;
488 ndev_ctx = container_of(w, struct net_device_context, dwork.work);
489 net_device = hv_get_drvdata(ndev_ctx->device_ctx);
490 rdev = net_device->extension;
491 net = net_device->ndev;
493 if (rdev->link_state) {
494 netif_carrier_off(net);
497 netif_carrier_on(net);
504 netdev_notify_peers(net);
508 static int netvsc_probe(struct hv_device *dev,
509 const struct hv_vmbus_device_id *dev_id)
511 struct net_device *net = NULL;
512 struct net_device_context *net_device_ctx;
513 struct netvsc_device_info device_info;
516 net = alloc_etherdev(sizeof(struct net_device_context));
520 net_device_ctx = netdev_priv(net);
521 net_device_ctx->device_ctx = dev;
522 hv_set_drvdata(dev, net);
523 INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
524 INIT_WORK(&net_device_ctx->work, do_set_multicast);
526 net->netdev_ops = &device_ops;
528 /* TODO: Add GSO and Checksum offload */
529 net->hw_features = NETIF_F_SG;
530 net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG;
532 SET_ETHTOOL_OPS(net, ðtool_ops);
533 SET_NETDEV_DEV(net, &dev->device);
535 /* Notify the netvsc driver of the new device */
536 device_info.ring_size = ring_size;
537 ret = rndis_filter_device_add(dev, &device_info);
539 netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
541 hv_set_drvdata(dev, NULL);
544 memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
546 ret = register_netdev(net);
548 pr_err("Unable to register netdev.\n");
549 rndis_filter_device_remove(dev);
556 static int netvsc_remove(struct hv_device *dev)
558 struct net_device *net;
559 struct net_device_context *ndev_ctx;
560 struct netvsc_device *net_device;
562 net_device = hv_get_drvdata(dev);
563 net = net_device->ndev;
566 dev_err(&dev->device, "No net device to remove\n");
570 net_device->start_remove = true;
572 ndev_ctx = netdev_priv(net);
573 cancel_delayed_work_sync(&ndev_ctx->dwork);
574 cancel_work_sync(&ndev_ctx->work);
576 /* Stop outbound asap */
577 netif_tx_disable(net);
579 unregister_netdev(net);
582 * Call to the vsc driver to let it know that the device is being
585 rndis_filter_device_remove(dev);
591 static const struct hv_vmbus_device_id id_table[] = {
597 MODULE_DEVICE_TABLE(vmbus, id_table);
599 /* The one and only one */
600 static struct hv_driver netvsc_drv = {
601 .name = KBUILD_MODNAME,
602 .id_table = id_table,
603 .probe = netvsc_probe,
604 .remove = netvsc_remove,
607 static void __exit netvsc_drv_exit(void)
609 vmbus_driver_unregister(&netvsc_drv);
612 static int __init netvsc_drv_init(void)
614 if (ring_size < RING_SIZE_MIN) {
615 ring_size = RING_SIZE_MIN;
616 pr_info("Increased ring_size to %d (min allowed)\n",
619 return vmbus_driver_register(&netvsc_drv);
622 MODULE_LICENSE("GPL");
623 MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
625 module_init(netvsc_drv_init);
626 module_exit(netvsc_drv_exit);