From 6d3865f9d41f15ddbcecaa6722871fc0db21d7ab Mon Sep 17 00:00:00 2001 From: Jim Baxter Date: Mon, 7 Jul 2014 18:33:18 +0100 Subject: [PATCH] usb: gadget: NCM: Add transmit multi-frame. This adds multi-frame support to the NCM NTB's for the gadget driver. This allows multiple network packets to be put inside a single USB NTB with a maximum size of 16kB. It has a time out of 300ms to ensure that smaller number of packets still maintain a normal latency. Also the .fp_index and .next_fp_index have been changed to .ndp_index and .next_ndp_index to match the latest CDC-NCM specification and help with maintenance. Results transmitting from gadget to host. Before the change: TCP_STREAM Throughput (10^6bits/sec): 22.72 UDP_STREAM Throughput (10^6bits/sec): 25.94 Latency: netperf -H 192.168.1.101 -v2 -l 50 -t TCP_RR -- -r 16384,16384 Trans. RoundTrip Throughput Rate Latency 10^6bits/s per sec usec/Tran Outbound 100.83 9918.116 13.215 After the change: TCP_STREAM Throughput (10^6bits/sec): 124.26 UDP_STREAM Throughput (10^6bits/sec): 227.48 Latency: netperf -H 192.168.1.101 -v2 -l 50 -t TCP_RR -- -r 16384,16384 Trans. RoundTrip Throughput Rate Latency 10^6bits/s per sec usec/Tran Outbound 156.80 6377.730 20.552 Signed-off-by: Jim Baxter Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_ncm.c | 335 ++++++++++++++++++++++++++--------- drivers/usb/gadget/u_ether.c | 19 +- drivers/usb/gadget/u_ether.h | 2 + 3 files changed, 269 insertions(+), 87 deletions(-) diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c index d0ebbac8845f..5452fb663762 100644 --- a/drivers/usb/gadget/f_ncm.c +++ b/drivers/usb/gadget/f_ncm.c @@ -68,6 +68,18 @@ struct f_ncm { * callback and ethernet open/close */ spinlock_t lock; + + struct net_device *netdev; + + /* For multi-frame NDP TX */ + struct sk_buff *skb_tx_data; + struct sk_buff *skb_tx_ndp; + u16 ndp_dgram_count; + bool timer_force_tx; + struct tasklet_struct tx_tasklet; + struct hrtimer task_timer; + + bool timer_stopping; }; static inline struct f_ncm *func_to_ncm(struct usb_function *f) @@ -92,15 +104,20 @@ static inline unsigned ncm_bitrate(struct usb_gadget *g) * If the host can group frames, allow it to do that, 16K is selected, * because it's used by default by the current linux host driver */ -#define NTB_DEFAULT_IN_SIZE USB_CDC_NCM_NTB_MIN_IN_SIZE +#define NTB_DEFAULT_IN_SIZE 16384 #define NTB_OUT_SIZE 16384 -/* - * skbs of size less than that will not be aligned - * to NCM's dwNtbInMaxSize to save bus bandwidth +/* Allocation for storing the NDP, 32 should suffice for a + * 16k packet. This allows a maximum of 32 * 507 Byte packets to + * be transmitted in a single 16kB skb, though when sending full size + * packets this limit will be plenty. + * Smaller packets are not likely to be trying to maximize the + * throughput and will be mstly sending smaller infrequent frames. */ +#define TX_MAX_NUM_DPE 32 -#define MAX_TX_NONFIXED (512 * 3) +/* Delay for the transmit to wait before sending an unfilled NTB frame. */ +#define TX_TIMEOUT_NSECS 300000 #define FORMATS_SUPPORTED (USB_CDC_NCM_NTB16_SUPPORTED | \ USB_CDC_NCM_NTB32_SUPPORTED) @@ -355,14 +372,15 @@ struct ndp_parser_opts { u32 ndp_sign; unsigned nth_size; unsigned ndp_size; + unsigned dpe_size; unsigned ndplen_align; /* sizes in u16 units */ unsigned dgram_item_len; /* index or length */ unsigned block_length; - unsigned fp_index; + unsigned ndp_index; unsigned reserved1; unsigned reserved2; - unsigned next_fp_index; + unsigned next_ndp_index; }; #define INIT_NDP16_OPTS { \ @@ -370,13 +388,14 @@ struct ndp_parser_opts { .ndp_sign = USB_CDC_NCM_NDP16_NOCRC_SIGN, \ .nth_size = sizeof(struct usb_cdc_ncm_nth16), \ .ndp_size = sizeof(struct usb_cdc_ncm_ndp16), \ + .dpe_size = sizeof(struct usb_cdc_ncm_dpe16), \ .ndplen_align = 4, \ .dgram_item_len = 1, \ .block_length = 1, \ - .fp_index = 1, \ + .ndp_index = 1, \ .reserved1 = 0, \ .reserved2 = 0, \ - .next_fp_index = 1, \ + .next_ndp_index = 1, \ } @@ -385,13 +404,14 @@ struct ndp_parser_opts { .ndp_sign = USB_CDC_NCM_NDP32_NOCRC_SIGN, \ .nth_size = sizeof(struct usb_cdc_ncm_nth32), \ .ndp_size = sizeof(struct usb_cdc_ncm_ndp32), \ + .dpe_size = sizeof(struct usb_cdc_ncm_dpe32), \ .ndplen_align = 8, \ .dgram_item_len = 2, \ .block_length = 2, \ - .fp_index = 2, \ + .ndp_index = 2, \ .reserved1 = 1, \ .reserved2 = 2, \ - .next_fp_index = 2, \ + .next_ndp_index = 2, \ } static const struct ndp_parser_opts ndp16_opts = INIT_NDP16_OPTS; @@ -803,6 +823,8 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (ncm->port.in_ep->driver_data) { DBG(cdev, "reset ncm\n"); + ncm->timer_stopping = true; + ncm->netdev = NULL; gether_disconnect(&ncm->port); ncm_reset_values(ncm); } @@ -839,6 +861,8 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) net = gether_connect(&ncm->port); if (IS_ERR(net)) return PTR_ERR(net); + ncm->netdev = net; + ncm->timer_stopping = false; } spin_lock(&ncm->lock); @@ -865,95 +889,232 @@ static int ncm_get_alt(struct usb_function *f, unsigned intf) return ncm->port.in_ep->driver_data ? 1 : 0; } +static struct sk_buff *package_for_tx(struct f_ncm *ncm) +{ + __le16 *ntb_iter; + struct sk_buff *skb2 = NULL; + unsigned ndp_pad; + unsigned ndp_index; + unsigned new_len; + + const struct ndp_parser_opts *opts = ncm->parser_opts; + const int ndp_align = le16_to_cpu(ntb_parameters.wNdpInAlignment); + const int dgram_idx_len = 2 * 2 * opts->dgram_item_len; + + /* Stop the timer */ + hrtimer_try_to_cancel(&ncm->task_timer); + + ndp_pad = ALIGN(ncm->skb_tx_data->len, ndp_align) - + ncm->skb_tx_data->len; + ndp_index = ncm->skb_tx_data->len + ndp_pad; + new_len = ndp_index + dgram_idx_len + ncm->skb_tx_ndp->len; + + /* Set the final BlockLength and wNdpIndex */ + ntb_iter = (void *) ncm->skb_tx_data->data; + /* Increment pointer to BlockLength */ + ntb_iter += 2 + 1 + 1; + put_ncm(&ntb_iter, opts->block_length, new_len); + put_ncm(&ntb_iter, opts->ndp_index, ndp_index); + + /* Set the final NDP wLength */ + new_len = opts->ndp_size + + (ncm->ndp_dgram_count * dgram_idx_len); + ncm->ndp_dgram_count = 0; + /* Increment from start to wLength */ + ntb_iter = (void *) ncm->skb_tx_ndp->data; + ntb_iter += 2; + put_unaligned_le16(new_len, ntb_iter); + + /* Merge the skbs */ + swap(skb2, ncm->skb_tx_data); + if (ncm->skb_tx_data) { + dev_kfree_skb_any(ncm->skb_tx_data); + ncm->skb_tx_data = NULL; + } + + /* Insert NDP alignment. */ + ntb_iter = (void *) skb_put(skb2, ndp_pad); + memset(ntb_iter, 0, ndp_pad); + + /* Copy NTB across. */ + ntb_iter = (void *) skb_put(skb2, ncm->skb_tx_ndp->len); + memcpy(ntb_iter, ncm->skb_tx_ndp->data, ncm->skb_tx_ndp->len); + dev_kfree_skb_any(ncm->skb_tx_ndp); + ncm->skb_tx_ndp = NULL; + + /* Insert zero'd datagram. */ + ntb_iter = (void *) skb_put(skb2, dgram_idx_len); + memset(ntb_iter, 0, dgram_idx_len); + + return skb2; +} + static struct sk_buff *ncm_wrap_ntb(struct gether *port, struct sk_buff *skb) { struct f_ncm *ncm = func_to_ncm(&port->func); - struct sk_buff *skb2; + struct sk_buff *skb2 = NULL; int ncb_len = 0; - __le16 *tmp; - int div; - int rem; - int pad; - int ndp_align; - int ndp_pad; + __le16 *ntb_data; + __le16 *ntb_ndp; + int dgram_pad; + unsigned max_size = ncm->port.fixed_in_len; const struct ndp_parser_opts *opts = ncm->parser_opts; - unsigned crc_len = ncm->is_crc ? sizeof(uint32_t) : 0; - - div = le16_to_cpu(ntb_parameters.wNdpInDivisor); - rem = le16_to_cpu(ntb_parameters.wNdpInPayloadRemainder); - ndp_align = le16_to_cpu(ntb_parameters.wNdpInAlignment); - - ncb_len += opts->nth_size; - ndp_pad = ALIGN(ncb_len, ndp_align) - ncb_len; - ncb_len += ndp_pad; - ncb_len += opts->ndp_size; - ncb_len += 2 * 2 * opts->dgram_item_len; /* Datagram entry */ - ncb_len += 2 * 2 * opts->dgram_item_len; /* Zero datagram entry */ - pad = ALIGN(ncb_len, div) + rem - ncb_len; - ncb_len += pad; + const int ndp_align = le16_to_cpu(ntb_parameters.wNdpInAlignment); + const int div = le16_to_cpu(ntb_parameters.wNdpInDivisor); + const int rem = le16_to_cpu(ntb_parameters.wNdpInPayloadRemainder); + const int dgram_idx_len = 2 * 2 * opts->dgram_item_len; - if (ncb_len + skb->len + crc_len > max_size) { - dev_kfree_skb_any(skb); + if (!skb && !ncm->skb_tx_data) return NULL; - } - skb2 = skb_copy_expand(skb, ncb_len, - max_size - skb->len - ncb_len - crc_len, - GFP_ATOMIC); - dev_kfree_skb_any(skb); - if (!skb2) - return NULL; + if (skb) { + /* Add the CRC if required up front */ + if (ncm->is_crc) { + uint32_t crc; + __le16 *crc_pos; + + crc = ~crc32_le(~0, + skb->data, + skb->len); + crc_pos = (void *) skb_put(skb, sizeof(uint32_t)); + put_unaligned_le32(crc, crc_pos); + } - skb = skb2; + /* If the new skb is too big for the current NCM NTB then + * set the current stored skb to be sent now and clear it + * ready for new data. + * NOTE: Assume maximum align for speed of calculation. + */ + if (ncm->skb_tx_data + && (ncm->ndp_dgram_count >= TX_MAX_NUM_DPE + || (ncm->skb_tx_data->len + + div + rem + skb->len + + ncm->skb_tx_ndp->len + ndp_align + (2 * dgram_idx_len)) + > max_size)) { + skb2 = package_for_tx(ncm); + if (!skb2) + goto err; + } - tmp = (void *) skb_push(skb, ncb_len); - memset(tmp, 0, ncb_len); + if (!ncm->skb_tx_data) { + ncb_len = opts->nth_size; + dgram_pad = ALIGN(ncb_len, div) + rem - ncb_len; + ncb_len += dgram_pad; - put_unaligned_le32(opts->nth_sign, tmp); /* dwSignature */ - tmp += 2; - /* wHeaderLength */ - put_unaligned_le16(opts->nth_size, tmp++); - tmp++; /* skip wSequence */ - put_ncm(&tmp, opts->block_length, skb->len); /* (d)wBlockLength */ - /* (d)wFpIndex */ - /* the first pointer is right after the NTH + align */ - put_ncm(&tmp, opts->fp_index, opts->nth_size + ndp_pad); + /* Create a new skb for the NTH and datagrams. */ + ncm->skb_tx_data = alloc_skb(max_size, GFP_ATOMIC); + if (!ncm->skb_tx_data) + goto err; - tmp = (void *)tmp + ndp_pad; + ntb_data = (void *) skb_put(ncm->skb_tx_data, ncb_len); + memset(ntb_data, 0, ncb_len); + /* dwSignature */ + put_unaligned_le32(opts->nth_sign, ntb_data); + ntb_data += 2; + /* wHeaderLength */ + put_unaligned_le16(opts->nth_size, ntb_data++); + + /* Allocate an skb for storing the NDP, + * TX_MAX_NUM_DPE should easily suffice for a + * 16k packet. + */ + ncm->skb_tx_ndp = alloc_skb((int)(opts->ndp_size + + opts->dpe_size + * TX_MAX_NUM_DPE), + GFP_ATOMIC); + if (!ncm->skb_tx_ndp) + goto err; + ntb_ndp = (void *) skb_put(ncm->skb_tx_ndp, + opts->ndp_size); + memset(ntb_ndp, 0, ncb_len); + /* dwSignature */ + put_unaligned_le32(ncm->ndp_sign, ntb_ndp); + ntb_ndp += 2; - /* NDP */ - put_unaligned_le32(ncm->ndp_sign, tmp); /* dwSignature */ - tmp += 2; - /* wLength */ - put_unaligned_le16(ncb_len - opts->nth_size - pad, tmp++); + /* There is always a zeroed entry */ + ncm->ndp_dgram_count = 1; - tmp += opts->reserved1; - tmp += opts->next_fp_index; /* skip reserved (d)wNextFpIndex */ - tmp += opts->reserved2; + /* Note: we skip opts->next_ndp_index */ + } - if (ncm->is_crc) { - uint32_t crc; + /* Delay the timer. */ + hrtimer_start(&ncm->task_timer, + ktime_set(0, TX_TIMEOUT_NSECS), + HRTIMER_MODE_REL); + + /* Add the datagram position entries */ + ntb_ndp = (void *) skb_put(ncm->skb_tx_ndp, dgram_idx_len); + memset(ntb_ndp, 0, dgram_idx_len); + + ncb_len = ncm->skb_tx_data->len; + dgram_pad = ALIGN(ncb_len, div) + rem - ncb_len; + ncb_len += dgram_pad; + + /* (d)wDatagramIndex */ + put_ncm(&ntb_ndp, opts->dgram_item_len, ncb_len); + /* (d)wDatagramLength */ + put_ncm(&ntb_ndp, opts->dgram_item_len, skb->len); + ncm->ndp_dgram_count++; + + /* Add the new data to the skb */ + ntb_data = (void *) skb_put(ncm->skb_tx_data, dgram_pad); + memset(ntb_data, 0, dgram_pad); + ntb_data = (void *) skb_put(ncm->skb_tx_data, skb->len); + memcpy(ntb_data, skb->data, skb->len); + dev_kfree_skb_any(skb); + skb = NULL; - crc = ~crc32_le(~0, - skb->data + ncb_len, - skb->len - ncb_len); - put_unaligned_le32(crc, skb->data + skb->len); - skb_put(skb, crc_len); + } else if (ncm->skb_tx_data && ncm->timer_force_tx) { + /* If the tx was requested because of a timeout then send */ + skb2 = package_for_tx(ncm); + if (!skb2) + goto err; } - /* (d)wDatagramIndex[0] */ - put_ncm(&tmp, opts->dgram_item_len, ncb_len); - /* (d)wDatagramLength[0] */ - put_ncm(&tmp, opts->dgram_item_len, skb->len - ncb_len); - /* (d)wDatagramIndex[1] and (d)wDatagramLength[1] already zeroed */ + return skb2; + +err: + ncm->netdev->stats.tx_dropped++; + + if (skb) + dev_kfree_skb_any(skb); + if (ncm->skb_tx_data) + dev_kfree_skb_any(ncm->skb_tx_data); + if (ncm->skb_tx_ndp) + dev_kfree_skb_any(ncm->skb_tx_ndp); + + return NULL; +} + +/* + * This transmits the NTB if there are frames waiting. + */ +static void ncm_tx_tasklet(unsigned long data) +{ + struct f_ncm *ncm = (void *)data; - if (skb->len > MAX_TX_NONFIXED) - memset(skb_put(skb, max_size - skb->len), - 0, max_size - skb->len); + if (ncm->timer_stopping) + return; + + /* Only send if data is available. */ + if (ncm->skb_tx_data) { + ncm->timer_force_tx = true; + ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev); + ncm->timer_force_tx = false; + } +} - return skb; +/* + * The transmit should only be run if no skb data has been sent + * for a certain duration. + */ +static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) +{ + struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); + tasklet_schedule(&ncm->tx_tasklet); + return HRTIMER_NORESTART; } static int ncm_unwrap_ntb(struct gether *port, @@ -996,7 +1157,7 @@ static int ncm_unwrap_ntb(struct gether *port, goto err; } - ndp_index = get_ncm(&tmp, opts->fp_index); + ndp_index = get_ncm(&tmp, opts->ndp_index); /* Run through all the NDP's in the NTB */ do { @@ -1033,7 +1194,7 @@ static int ncm_unwrap_ntb(struct gether *port, } tmp += opts->reserved1; /* Check for another NDP (d)wNextNdpIndex */ - ndp_index = get_ncm(&tmp, opts->next_fp_index); + ndp_index = get_ncm(&tmp, opts->next_ndp_index); tmp += opts->reserved2; ndp_len -= opts->ndp_size; @@ -1107,8 +1268,11 @@ static void ncm_disable(struct usb_function *f) DBG(cdev, "ncm deactivated\n"); - if (ncm->port.in_ep->driver_data) + if (ncm->port.in_ep->driver_data) { + ncm->timer_stopping = true; + ncm->netdev = NULL; gether_disconnect(&ncm->port); + } if (ncm->notify->driver_data) { usb_ep_disable(ncm->notify); @@ -1277,6 +1441,10 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) ncm->port.open = ncm_open; ncm->port.close = ncm_close; + tasklet_init(&ncm->tx_tasklet, ncm_tx_tasklet, (unsigned long) ncm); + hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ncm->task_timer.function = ncm_tx_timeout; + DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n", gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", ncm->port.in_ep->name, ncm->port.out_ep->name, @@ -1390,6 +1558,10 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) DBG(c->cdev, "ncm unbind\n"); + hrtimer_cancel(&ncm->task_timer); + tasklet_kill(&ncm->tx_tasklet); + + ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); kfree(ncm->notify_req->buf); @@ -1426,6 +1598,7 @@ static struct usb_function *ncm_alloc(struct usb_function_instance *fi) ncm->port.ioport = netdev_priv(opts->net); mutex_unlock(&opts->lock); ncm->port.is_fixed = true; + ncm->port.supports_multi_frame = true; ncm->port.func.name = "cdc_network"; /* descriptors are per-instance copies */ diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 3d78a8844e43..6e6f87656e7b 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -483,7 +483,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, struct net_device *net) { struct eth_dev *dev = netdev_priv(net); - int length = skb->len; + int length = 0; int retval; struct usb_request *req = NULL; unsigned long flags; @@ -500,13 +500,13 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, } spin_unlock_irqrestore(&dev->lock, flags); - if (!in) { + if (skb && !in) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* apply outgoing CDC or RNDIS filters */ - if (!is_promisc(cdc_filter)) { + if (skb && !is_promisc(cdc_filter)) { u8 *dest = skb->data; if (is_multicast_ether_addr(dest)) { @@ -557,11 +557,17 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, if (dev->port_usb) skb = dev->wrap(dev->port_usb, skb); spin_unlock_irqrestore(&dev->lock, flags); - if (!skb) + if (!skb) { + /* Multi frame CDC protocols may store the frame for + * later which is not a dropped frame. + */ + if (dev->port_usb->supports_multi_frame) + goto multiframe; goto drop; - - length = skb->len; + } } + + length = skb->len; req->buf = skb->data; req->context = skb; req->complete = tx_complete; @@ -604,6 +610,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb); drop: dev->net->stats.tx_dropped++; +multiframe: spin_lock_irqsave(&dev->req_lock, flags); if (list_empty(&dev->tx_reqs)) netif_start_queue(net); diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h index 0f0290acea7e..334b38947916 100644 --- a/drivers/usb/gadget/u_ether.h +++ b/drivers/usb/gadget/u_ether.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "gadget_chips.h" @@ -74,6 +75,7 @@ struct gether { bool is_fixed; u32 fixed_out_len; u32 fixed_in_len; + bool supports_multi_frame; struct sk_buff *(*wrap)(struct gether *port, struct sk_buff *skb); int (*unwrap)(struct gether *port, -- 2.34.1