drm/amdgpu: allow unaligned memory access (v2)
authorJack Xiao <Jack.Xiao@amd.com>
Fri, 8 May 2015 06:46:49 +0000 (14:46 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 4 Jun 2015 01:03:28 +0000 (21:03 -0400)
Set up the CP and SDMA for proper unaligned memory access.
Required for OpenCL 2.x

v2: udpate commit message

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index ae2bb26fa46e4f1b865d2a1d99fa476983316797..037e3db6954738e225f21f77e04473e7ef459693 100644 (file)
@@ -33,6 +33,8 @@
 #include "bif/bif_4_1_sh_mask.h"
 
 #include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_1_d.h"
 #include "gmc/gmc_7_1_sh_mask.h"
@@ -837,6 +839,8 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
        u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
                          SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
+       u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                      SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 
        amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        if (vm_id < 8) {
@@ -857,7 +861,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
        amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, sh_mem_cfg);
 
        amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
        amdgpu_ring_write(ring, mmSH_MEM_APE1_BASE);
index 675b096417f48bf818419f06bd8c92dd1e870418..26df23eaf09e98be8856c50d75f9f63ac6e04884 100644 (file)
@@ -2022,6 +2022,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
        u32 gb_addr_config;
        u32 mc_shared_chmap, mc_arb_ramcfg;
        u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+       u32 sh_mem_cfg;
        u32 tmp;
        int i;
 
@@ -2214,11 +2215,14 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 
        /* XXX SH_MEM regs */
        /* where to put LDS, scratch, GPUVM in FSA64 space */
+       sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                  SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+
        mutex_lock(&adev->srbm_mutex);
        for (i = 0; i < 16; i++) {
                cik_srbm_select(adev, 0, 0, 0, i);
                /* CP and shaders */
-               WREG32(mmSH_MEM_CONFIG, 0);
+               WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
                WREG32(mmSH_MEM_APE1_BASE, 1);
                WREG32(mmSH_MEM_APE1_LIMIT, 0);
                WREG32(mmSH_MEM_BASES, 0);
index a8397dd2bce47dce6eef9af925404b87b4bb284f..3762998df3518ceee561663c01068011cbbfdc2a 100644 (file)
@@ -2050,10 +2050,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
                if (i == 0) {
                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
+                       tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                           SH_MEM_ALIGNMENT_MODE_UNALIGNED);
                        WREG32(mmSH_MEM_CONFIG, tmp);
                } else {
                        tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
                        tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
+                       tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                           SH_MEM_ALIGNMENT_MODE_UNALIGNED);
                        WREG32(mmSH_MEM_CONFIG, tmp);
                }
 
index a83029d548c127b2acf6bff9a7569aebd9d6f36f..389509aeddf89bcaa2dbb48611fa684cbff8f59d 100644 (file)
@@ -36,6 +36,7 @@
 #include "gmc/gmc_8_1_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
 
 #include "bif/bif_5_0_d.h"
@@ -900,6 +901,8 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
                                         unsigned vm_id, uint64_t pd_addr)
 {
        u32 srbm_gfx_cntl = 0;
+       u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                      SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
@@ -925,7 +928,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
        amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, sh_mem_cfg);
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
index dd547c7f6cbce187bd083754d46b102217ad8557..d3eda315e71964d06687d57e83a9dd0033bfc4d5 100644 (file)
@@ -36,6 +36,7 @@
 #include "gmc/gmc_8_1_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
 
 #include "bif/bif_5_0_d.h"
@@ -963,6 +964,8 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                                         unsigned vm_id, uint64_t pd_addr)
 {
        u32 srbm_gfx_cntl = 0;
+       u32 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+                                      SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
@@ -988,7 +991,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
        amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, sh_mem_cfg);
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));