mtip32xx: Make SGL container per-command to eliminate high order dma allocation
[firefly-linux-kernel-4.4.55.git] / drivers / block / mtip32xx / mtip32xx.c
index 52b2f2a714708a756b4adcee140e8ae2a91f5006..966b4371a30fa1e136e2ed537c9e95bcf7da1816 100644 (file)
 #include "mtip32xx.h"
 
 #define HW_CMD_SLOT_SZ         (MTIP_MAX_COMMAND_SLOTS * 32)
-#define HW_CMD_TBL_SZ          (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
-#define HW_CMD_TBL_AR_SZ       (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
-#define HW_PORT_PRIV_DMA_SZ \
-               (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
+
+/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
+#define AHCI_RX_FIS_SZ          0x100
+#define AHCI_RX_FIS_OFFSET      0x0
+#define AHCI_IDFY_SZ            ATA_SECT_SIZE
+#define AHCI_IDFY_OFFSET        0x400
+#define AHCI_SECTBUF_SZ         ATA_SECT_SIZE
+#define AHCI_SECTBUF_OFFSET     0x800
+#define AHCI_SMARTBUF_SZ        ATA_SECT_SIZE
+#define AHCI_SMARTBUF_OFFSET    0xC00
+/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
+#define BLOCK_DMA_ALLOC_SZ      4096
+
+/* DMA region containing command table (should be 8192 bytes) */
+#define AHCI_CMD_SLOT_SZ        sizeof(struct mtip_cmd_hdr)
+#define AHCI_CMD_TBL_SZ         (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
+#define AHCI_CMD_TBL_OFFSET     0x0
+
+/* DMA region per command (contains header and SGL) */
+#define AHCI_CMD_TBL_HDR_SZ     0x80
+#define AHCI_CMD_TBL_HDR_OFFSET 0x0
+#define AHCI_CMD_TBL_SGL_SZ     (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
+#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
+#define CMD_DMA_ALLOC_SZ        (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
+
 
 #define HOST_CAP_NZDMA         (1 << 19)
 #define HOST_HSORG             0xFC
@@ -3312,6 +3333,118 @@ st_out:
        return 0;
 }
 
+/*
+ * DMA region teardown
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ *      None
+ */
+static void mtip_dma_free(struct driver_data *dd)
+{
+       int i;
+       struct mtip_port *port = dd->port;
+
+       if (port->block1)
+               dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+                                       port->block1, port->block1_dma);
+
+       if (port->command_list) {
+               dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+                               port->command_list, port->command_list_dma);
+       }
+
+       for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+               if (port->commands[i].command)
+                       dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+                               port->commands[i].command,
+                               port->commands[i].command_dma);
+       }
+}
+
+/*
+ * DMA region setup
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ *      -ENOMEM Not enough free DMA region space to initialize driver
+ */
+static int mtip_dma_alloc(struct driver_data *dd)
+{
+       struct mtip_port *port = dd->port;
+       int i, rv = 0;
+       u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+       /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+       port->block1 =
+               dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+                                       &port->block1_dma, GFP_KERNEL);
+       if (!port->block1)
+               return -ENOMEM;
+       memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
+
+       /* Allocate dma memory for command list */
+       port->command_list =
+               dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+                                       &port->command_list_dma, GFP_KERNEL);
+       if (!port->command_list) {
+               dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+                                       port->block1, port->block1_dma);
+               port->block1 = NULL;
+               port->block1_dma = 0;
+               return -ENOMEM;
+       }
+       memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
+
+       /* Setup all pointers into first DMA region */
+       port->rxfis         = port->block1 + AHCI_RX_FIS_OFFSET;
+       port->rxfis_dma     = port->block1_dma + AHCI_RX_FIS_OFFSET;
+       port->identify      = port->block1 + AHCI_IDFY_OFFSET;
+       port->identify_dma  = port->block1_dma + AHCI_IDFY_OFFSET;
+       port->log_buf       = port->block1 + AHCI_SECTBUF_OFFSET;
+       port->log_buf_dma   = port->block1_dma + AHCI_SECTBUF_OFFSET;
+       port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
+       port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
+
+       /* Setup per command SGL DMA region */
+
+       /* Point the command headers at the command tables */
+       for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+               port->commands[i].command =
+                       dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+                               &port->commands[i].command_dma, GFP_KERNEL);
+               if (!port->commands[i].command) {
+                       rv = -ENOMEM;
+                       mtip_dma_free(dd);
+                       return rv;
+               }
+               memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
+
+               port->commands[i].command_header = port->command_list +
+                                       (sizeof(struct mtip_cmd_hdr) * i);
+               port->commands[i].command_header_dma =
+                                       dd->port->command_list_dma +
+                                       (sizeof(struct mtip_cmd_hdr) * i);
+
+               if (host_cap_64)
+                       port->commands[i].command_header->ctbau =
+                               __force_bit2int cpu_to_le32(
+                               (port->commands[i].command_dma >> 16) >> 16);
+
+               port->commands[i].command_header->ctba =
+                               __force_bit2int cpu_to_le32(
+                               port->commands[i].command_dma & 0xFFFFFFFF);
+
+               sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
+
+               /* Mark command as currently inactive */
+               atomic_set(&dd->port->commands[i].active, 0);
+       }
+       return 0;
+}
+
 /*
  * Called once for each card.
  *
@@ -3370,83 +3503,10 @@ static int mtip_hw_init(struct driver_data *dd)
        dd->port->mmio  = dd->mmio + PORT_OFFSET;
        dd->port->dd    = dd;
 
-       /* Allocate memory for the command list. */
-       dd->port->command_list =
-               dmam_alloc_coherent(&dd->pdev->dev,
-                       HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-                       &dd->port->command_list_dma,
-                       GFP_KERNEL);
-       if (!dd->port->command_list) {
-               dev_err(&dd->pdev->dev,
-                       "Memory allocation: command list\n");
-               rv = -ENOMEM;
+       /* DMA allocations */
+       rv = mtip_dma_alloc(dd);
+       if (rv < 0)
                goto out1;
-       }
-
-       /* Clear the memory we have allocated. */
-       memset(dd->port->command_list,
-               0,
-               HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
-
-       /* Setup the addresse of the RX FIS. */
-       dd->port->rxfis     = dd->port->command_list + HW_CMD_SLOT_SZ;
-       dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
-
-       /* Setup the address of the command tables. */
-       dd->port->command_table   = dd->port->rxfis + AHCI_RX_FIS_SZ;
-       dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
-
-       /* Setup the address of the identify data. */
-       dd->port->identify     = dd->port->command_table +
-                                       HW_CMD_TBL_AR_SZ;
-       dd->port->identify_dma = dd->port->command_tbl_dma +
-                                       HW_CMD_TBL_AR_SZ;
-
-       /* Setup the address of the sector buffer - for some non-ncq cmds */
-       dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
-       dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
-
-       /* Setup the address of the log buf - for read log command */
-       dd->port->log_buf = (void *)dd->port->sector_buffer  + ATA_SECT_SIZE;
-       dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
-
-       /* Setup the address of the smart buf - for smart read data command */
-       dd->port->smart_buf = (void *)dd->port->log_buf  + ATA_SECT_SIZE;
-       dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
-
-
-       /* Point the command headers at the command tables. */
-       for (i = 0; i < num_command_slots; i++) {
-               dd->port->commands[i].command_header =
-                                       dd->port->command_list +
-                                       (sizeof(struct mtip_cmd_hdr) * i);
-               dd->port->commands[i].command_header_dma =
-                                       dd->port->command_list_dma +
-                                       (sizeof(struct mtip_cmd_hdr) * i);
-
-               dd->port->commands[i].command =
-                       dd->port->command_table + (HW_CMD_TBL_SZ * i);
-               dd->port->commands[i].command_dma =
-                       dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
-
-               if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
-                       dd->port->commands[i].command_header->ctbau =
-                       __force_bit2int cpu_to_le32(
-                       (dd->port->commands[i].command_dma >> 16) >> 16);
-               dd->port->commands[i].command_header->ctba =
-                       __force_bit2int cpu_to_le32(
-                       dd->port->commands[i].command_dma & 0xFFFFFFFF);
-
-               /*
-                * If this is not done, a bug is reported by the stock
-                * FC11 i386. Due to the fact that it has lots of kernel
-                * debugging enabled.
-                */
-               sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
-
-               /* Mark all commands as currently inactive.*/
-               atomic_set(&dd->port->commands[i].active, 0);
-       }
 
        /* Setup the pointers to the extended s_active and CI registers. */
        for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3654,8 @@ out3:
 
 out2:
        mtip_deinit_port(dd->port);
+       mtip_dma_free(dd);
 
-       /* Free the command/command header memory. */
-       dmam_free_coherent(&dd->pdev->dev,
-                               HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-                               dd->port->command_list,
-                               dd->port->command_list_dma);
 out1:
        /* Free the memory allocated for the for structure. */
        kfree(dd->port);
@@ -3641,11 +3697,9 @@ static int mtip_hw_exit(struct driver_data *dd)
        irq_set_affinity_hint(dd->pdev->irq, NULL);
        devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
-       /* Free the command/command header memory. */
-       dmam_free_coherent(&dd->pdev->dev,
-                       HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-                       dd->port->command_list,
-                       dd->port->command_list_dma);
+       /* Free dma regions */
+       mtip_dma_free(dd);
+
        /* Free the memory allocated for the for structure. */
        kfree(dd->port);
        dd->port = NULL;