drm/radeon: add IB and fence dispatch functions for CIK gfx (v7)
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 5 Jul 2012 15:45:40 +0000 (11:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 25 Jun 2013 21:50:29 +0000 (17:50 -0400)
For gfx ring only.  Compute is still todo.

v2: add documentation
v3: update to latest reset changes, integrate emit update patch.
v4: fix count on wait_reg_mem for HDP flush
v5: use old hdp flush method for fence
v6: set valid bit for IB
v7: cleanup for release

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cikd.h

index 5712526a446866a63ad10943eaf9ec7b2083c5d1..0b9c3c95a6be2198308f8c9eae4b2fb2940f53fb 100644 (file)
@@ -1492,6 +1492,140 @@ static void cik_gpu_init(struct radeon_device *rdev)
        udelay(50);
 }
 
+/*
+ * GPU scratch registers helpers function.
+ */
+/**
+ * cik_scratch_init - setup driver info for CP scratch regs
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the number and offset of the CP scratch registers.
+ * NOTE: use of CP scratch registers is a legacy inferface and
+ * is not used by default on newer asics (r6xx+).  On newer asics,
+ * memory buffers are used for fences rather than scratch regs.
+ */
+static void cik_scratch_init(struct radeon_device *rdev)
+{
+       int i;
+
+       rdev->scratch.num_reg = 7;
+       rdev->scratch.reg_base = SCRATCH_REG0;
+       for (i = 0; i < rdev->scratch.num_reg; i++) {
+               rdev->scratch.free[i] = true;
+               rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
+       }
+}
+
+/**
+ * cik_fence_ring_emit - emit a fence on the gfx ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Emits a fence sequnce number on the gfx ring and flushes
+ * GPU caches.
+ */
+void cik_fence_ring_emit(struct radeon_device *rdev,
+                        struct radeon_fence *fence)
+{
+       struct radeon_ring *ring = &rdev->ring[fence->ring];
+       u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+       /* EVENT_WRITE_EOP - flush caches, send int */
+       radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+       radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
+                                EOP_TC_ACTION_EN |
+                                EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+                                EVENT_INDEX(5)));
+       radeon_ring_write(ring, addr & 0xfffffffc);
+       radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
+       radeon_ring_write(ring, fence->seq);
+       radeon_ring_write(ring, 0);
+       /* HDP flush */
+       /* We should be using the new WAIT_REG_MEM special op packet here
+        * but it causes the CP to hang
+        */
+       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+                                WRITE_DATA_DST_SEL(0)));
+       radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
+       radeon_ring_write(ring, 0);
+       radeon_ring_write(ring, 0);
+}
+
+void cik_semaphore_ring_emit(struct radeon_device *rdev,
+                            struct radeon_ring *ring,
+                            struct radeon_semaphore *semaphore,
+                            bool emit_wait)
+{
+       uint64_t addr = semaphore->gpu_addr;
+       unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
+
+       radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
+       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
+}
+
+/*
+ * IB stuff
+ */
+/**
+ * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
+ *
+ * @rdev: radeon_device pointer
+ * @ib: radeon indirect buffer object
+ *
+ * Emits an DE (drawing engine) or CE (constant engine) IB
+ * on the gfx ring.  IBs are usually generated by userspace
+ * acceleration drivers and submitted to the kernel for
+ * sheduling on the ring.  This function schedules the IB
+ * on the gfx ring for execution by the GPU.
+ */
+void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       u32 header, control = INDIRECT_BUFFER_VALID;
+
+       if (ib->is_const_ib) {
+               /* set switch buffer packet before const IB */
+               radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               radeon_ring_write(ring, 0);
+
+               header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+       } else {
+               u32 next_rptr;
+               if (ring->rptr_save_reg) {
+                       next_rptr = ring->wptr + 3 + 4;
+                       radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+                       radeon_ring_write(ring, ((ring->rptr_save_reg -
+                                                 PACKET3_SET_UCONFIG_REG_START) >> 2));
+                       radeon_ring_write(ring, next_rptr);
+               } else if (rdev->wb.enabled) {
+                       next_rptr = ring->wptr + 5 + 4;
+                       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+                       radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
+                       radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+                       radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+                       radeon_ring_write(ring, next_rptr);
+               }
+
+               header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+       }
+
+       control |= ib->length_dw |
+               (ib->vm ? (ib->vm->id << 24) : 0);
+
+       radeon_ring_write(ring, header);
+       radeon_ring_write(ring,
+#ifdef __BIG_ENDIAN
+                         (2 << 0) |
+#endif
+                         (ib->gpu_addr & 0xFFFFFFFC));
+       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+       radeon_ring_write(ring, control);
+}
+
 /*
  * CP.
  * On CIK, gfx and compute now have independant command processors.
index 0d1a29849a7e67e6800ac93c89f6afad90531abe..783cf600580390cb1aa970ff6c3929eb9f60335f 100644 (file)
 
 #define HDP_REG_COHERENCY_FLUSH_CNTL                   0x54A0
 
+#define GPU_HDP_FLUSH_REQ                              0x54DC
+#define GPU_HDP_FLUSH_DONE                             0x54E0
+#define                CP0                                     (1 << 0)
+#define                CP1                                     (1 << 1)
+#define                CP2                                     (1 << 2)
+#define                CP3                                     (1 << 3)
+#define                CP4                                     (1 << 4)
+#define                CP5                                     (1 << 5)
+#define                CP6                                     (1 << 6)
+#define                CP7                                     (1 << 7)
+#define                CP8                                     (1 << 8)
+#define                CP9                                     (1 << 9)
+#define                SDMA0                                   (1 << 10)
+#define                SDMA1                                   (1 << 11)
+
 #define        GRBM_CNTL                                       0x8000
 #define                GRBM_READ_TIMEOUT(x)                            ((x) << 0)
 
 #       define RASTER_CONFIG_RB_MAP_2                   2
 #       define RASTER_CONFIG_RB_MAP_3                   3
 
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define SAMPLE_STREAMOUTSTATS1                   (1 << 0)
+#       define SAMPLE_STREAMOUTSTATS2                   (2 << 0)
+#       define SAMPLE_STREAMOUTSTATS3                   (3 << 0)
+#       define CACHE_FLUSH_TS                           (4 << 0)
+#       define CACHE_FLUSH                              (6 << 0)
+#       define CS_PARTIAL_FLUSH                         (7 << 0)
+#       define VGT_STREAMOUT_RESET                      (10 << 0)
+#       define END_OF_PIPE_INCR_DE                      (11 << 0)
+#       define END_OF_PIPE_IB_END                       (12 << 0)
+#       define RST_PIX_CNT                              (13 << 0)
+#       define VS_PARTIAL_FLUSH                         (15 << 0)
+#       define PS_PARTIAL_FLUSH                         (16 << 0)
+#       define CACHE_FLUSH_AND_INV_TS_EVENT             (20 << 0)
+#       define ZPASS_DONE                               (21 << 0)
+#       define CACHE_FLUSH_AND_INV_EVENT                (22 << 0)
+#       define PERFCOUNTER_START                        (23 << 0)
+#       define PERFCOUNTER_STOP                         (24 << 0)
+#       define PIPELINESTAT_START                       (25 << 0)
+#       define PIPELINESTAT_STOP                        (26 << 0)
+#       define PERFCOUNTER_SAMPLE                       (27 << 0)
+#       define SAMPLE_PIPELINESTAT                      (30 << 0)
+#       define SO_VGT_STREAMOUT_FLUSH                   (31 << 0)
+#       define SAMPLE_STREAMOUTSTATS                    (32 << 0)
+#       define RESET_VTX_CNT                            (33 << 0)
+#       define VGT_FLUSH                                (36 << 0)
+#       define BOTTOM_OF_PIPE_TS                        (40 << 0)
+#       define DB_CACHE_FLUSH_AND_INV                   (42 << 0)
+#       define FLUSH_AND_INV_DB_DATA_TS                 (43 << 0)
+#       define FLUSH_AND_INV_DB_META                    (44 << 0)
+#       define FLUSH_AND_INV_CB_DATA_TS                 (45 << 0)
+#       define FLUSH_AND_INV_CB_META                    (46 << 0)
+#       define CS_DONE                                  (47 << 0)
+#       define PS_DONE                                  (48 << 0)
+#       define FLUSH_AND_INV_CB_PIXEL_DATA              (49 << 0)
+#       define THREAD_TRACE_START                       (51 << 0)
+#       define THREAD_TRACE_STOP                        (52 << 0)
+#       define THREAD_TRACE_FLUSH                       (54 << 0)
+#       define THREAD_TRACE_FINISH                      (55 << 0)
+#       define PIXEL_PIPE_STAT_CONTROL                  (56 << 0)
+#       define PIXEL_PIPE_STAT_DUMP                     (57 << 0)
+#       define PIXEL_PIPE_STAT_RESET                    (58 << 0)
+
 #define        SCRATCH_REG0                                    0x30100
 #define        SCRATCH_REG1                                    0x30104
 #define        SCRATCH_REG2                                    0x30108
 
 #define        CP_SEM_INCOMPLETE_TIMER_CNTL                    0x301C8
 
+#define        CP_WAIT_REG_MEM_TIMEOUT                         0x301D0
+
 #define GRBM_GFX_INDEX                                 0x30800
 #define                INSTANCE_INDEX(x)                       ((x) << 0)
 #define                SH_INDEX(x)                             ((x) << 8)
 #define        PACKET3_DRAW_INDEX_OFFSET_2                     0x35
 #define        PACKET3_DRAW_PREAMBLE                           0x36
 #define        PACKET3_WRITE_DATA                              0x37
+#define                WRITE_DATA_DST_SEL(x)                   ((x) << 8)
+                /* 0 - register
+                * 1 - memory (sync - via GRBM)
+                * 2 - gl2
+                * 3 - gds
+                * 4 - reserved
+                * 5 - memory (async - direct)
+                */
+#define                WR_ONE_ADDR                             (1 << 16)
+#define                WR_CONFIRM                              (1 << 20)
+#define                WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
+                /* 0 - LRU
+                * 1 - Stream
+                */
+#define                WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
+                /* 0 - me
+                * 1 - pfp
+                * 2 - ce
+                */
 #define        PACKET3_DRAW_INDEX_INDIRECT_MULTI               0x38
 #define        PACKET3_MEM_SEMAPHORE                           0x39
+#              define PACKET3_SEM_USE_MAILBOX       (0x1 << 16)
+#              define PACKET3_SEM_SEL_SIGNAL_TYPE   (0x1 << 20) /* 0 = increment, 1 = write 1 */
+#              define PACKET3_SEM_CLIENT_CODE      ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */
+#              define PACKET3_SEM_SEL_SIGNAL       (0x6 << 29)
+#              define PACKET3_SEM_SEL_WAIT         (0x7 << 29)
 #define        PACKET3_COPY_DW                                 0x3B
 #define        PACKET3_WAIT_REG_MEM                            0x3C
+#define                WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
+                /* 0 - always
+                * 1 - <
+                * 2 - <=
+                * 3 - ==
+                * 4 - !=
+                * 5 - >=
+                * 6 - >
+                */
+#define                WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
+                /* 0 - reg
+                * 1 - mem
+                */
+#define                WAIT_REG_MEM_OPERATION(x)               ((x) << 6)
+                /* 0 - wait_reg_mem
+                * 1 - wr_wait_wr_reg
+                */
+#define                WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
+                /* 0 - me
+                * 1 - pfp
+                */
 #define        PACKET3_INDIRECT_BUFFER                         0x3F
+#define                INDIRECT_BUFFER_TCL2_VOLATILE           (1 << 22)
+#define                INDIRECT_BUFFER_VALID                   (1 << 23)
+#define                INDIRECT_BUFFER_CACHE_POLICY(x)         ((x) << 28)
+                /* 0 - LRU
+                * 1 - Stream
+                * 2 - Bypass
+                */
 #define        PACKET3_COPY_DATA                               0x40
 #define        PACKET3_PFP_SYNC_ME                             0x42
 #define        PACKET3_SURFACE_SYNC                            0x43
 #define                EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
 #define                EOP_TCL1_ACTION_EN                      (1 << 16)
 #define                EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
-#define                CACHE_POLICY(x)                         ((x) << 25)
+#define                EOP_CACHE_POLICY(x)                     ((x) << 25)
                 /* 0 - LRU
                 * 1 - Stream
                 * 2 - Bypass
                 */
-#define                TCL2_VOLATILE                           (1 << 27)
+#define                EOP_TCL2_VOLATILE                       (1 << 27)
 #define                DATA_SEL(x)                             ((x) << 29)
                 /* 0 - discard
                 * 1 - send low 32bit data
 #define        PACKET3_SET_SH_REG_OFFSET                       0x77
 #define        PACKET3_SET_QUEUE_REG                           0x78
 #define        PACKET3_SET_UCONFIG_REG                         0x79
+#define                PACKET3_SET_UCONFIG_REG_START                   0x00030000
+#define                PACKET3_SET_UCONFIG_REG_END                     0x00031000
 #define        PACKET3_SCRATCH_RAM_WRITE                       0x7D
 #define        PACKET3_SCRATCH_RAM_READ                        0x7E
 #define        PACKET3_LOAD_CONST_RAM                          0x80
 #define        PACKET3_INCREMENT_DE_COUNTER                    0x85
 #define        PACKET3_WAIT_ON_CE_COUNTER                      0x86
 #define        PACKET3_WAIT_ON_DE_COUNTER_DIFF                 0x88
-
+#define        PACKET3_SWITCH_BUFFER                           0x8B
 
 #endif