drm/radeon/kms: Don't try to process irq when we are unloading
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / r100.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_drm.h"
32 #include "radeon_reg.h"
33 #include "radeon.h"
34 #include "r100d.h"
35
36 #include <linux/firmware.h>
37 #include <linux/platform_device.h>
38
39 #include "r100_reg_safe.h"
40 #include "rn50_reg_safe.h"
41
42 /* Firmware Names */
43 #define FIRMWARE_R100           "radeon/R100_cp.bin"
44 #define FIRMWARE_R200           "radeon/R200_cp.bin"
45 #define FIRMWARE_R300           "radeon/R300_cp.bin"
46 #define FIRMWARE_R420           "radeon/R420_cp.bin"
47 #define FIRMWARE_RS690          "radeon/RS690_cp.bin"
48 #define FIRMWARE_RS600          "radeon/RS600_cp.bin"
49 #define FIRMWARE_R520           "radeon/R520_cp.bin"
50
51 MODULE_FIRMWARE(FIRMWARE_R100);
52 MODULE_FIRMWARE(FIRMWARE_R200);
53 MODULE_FIRMWARE(FIRMWARE_R300);
54 MODULE_FIRMWARE(FIRMWARE_R420);
55 MODULE_FIRMWARE(FIRMWARE_RS690);
56 MODULE_FIRMWARE(FIRMWARE_RS600);
57 MODULE_FIRMWARE(FIRMWARE_R520);
58
59 #include "r100_track.h"
60
61 /* This files gather functions specifics to:
62  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
63  *
64  * Some of these functions might be used by newer ASICs.
65  */
66 int r200_init(struct radeon_device *rdev);
67 void r100_hdp_reset(struct radeon_device *rdev);
68 void r100_gpu_init(struct radeon_device *rdev);
69 int r100_gui_wait_for_idle(struct radeon_device *rdev);
70 int r100_mc_wait_for_idle(struct radeon_device *rdev);
71 void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
72 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
73 int r100_debugfs_mc_info_init(struct radeon_device *rdev);
74
75
76 /*
77  * PCI GART
78  */
79 void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
80 {
81         /* TODO: can we do somethings here ? */
82         /* It seems hw only cache one entry so we should discard this
83          * entry otherwise if first GPU GART read hit this entry it
84          * could end up in wrong address. */
85 }
86
87 int r100_pci_gart_enable(struct radeon_device *rdev)
88 {
89         uint32_t tmp;
90         int r;
91
92         /* Initialize common gart structure */
93         r = radeon_gart_init(rdev);
94         if (r) {
95                 return r;
96         }
97         if (rdev->gart.table.ram.ptr == NULL) {
98                 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
99                 r = radeon_gart_table_ram_alloc(rdev);
100                 if (r) {
101                         return r;
102                 }
103         }
104         /* discard memory request outside of configured range */
105         tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
106         WREG32(RADEON_AIC_CNTL, tmp);
107         /* set address range for PCI address translate */
108         WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
109         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
110         WREG32(RADEON_AIC_HI_ADDR, tmp);
111         /* Enable bus mastering */
112         tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
113         WREG32(RADEON_BUS_CNTL, tmp);
114         /* set PCI GART page-table base address */
115         WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
116         tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
117         WREG32(RADEON_AIC_CNTL, tmp);
118         r100_pci_gart_tlb_flush(rdev);
119         rdev->gart.ready = true;
120         return 0;
121 }
122
123 void r100_pci_gart_disable(struct radeon_device *rdev)
124 {
125         uint32_t tmp;
126
127         /* discard memory request outside of configured range */
128         tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
129         WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
130         WREG32(RADEON_AIC_LO_ADDR, 0);
131         WREG32(RADEON_AIC_HI_ADDR, 0);
132 }
133
134 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
135 {
136         if (i < 0 || i > rdev->gart.num_gpu_pages) {
137                 return -EINVAL;
138         }
139         rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
140         return 0;
141 }
142
143 int r100_gart_enable(struct radeon_device *rdev)
144 {
145         if (rdev->flags & RADEON_IS_AGP) {
146                 r100_pci_gart_disable(rdev);
147                 return 0;
148         }
149         return r100_pci_gart_enable(rdev);
150 }
151
152
153 /*
154  * MC
155  */
156 void r100_mc_disable_clients(struct radeon_device *rdev)
157 {
158         uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
159
160         /* FIXME: is this function correct for rs100,rs200,rs300 ? */
161         if (r100_gui_wait_for_idle(rdev)) {
162                 printk(KERN_WARNING "Failed to wait GUI idle while "
163                        "programming pipes. Bad things might happen.\n");
164         }
165
166         /* stop display and memory access */
167         ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
168         WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
169         crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
170         WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
171         crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
172
173         r100_gpu_wait_for_vsync(rdev);
174
175         WREG32(RADEON_CRTC_GEN_CNTL,
176                (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
177                RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
178
179         if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
180                 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
181
182                 r100_gpu_wait_for_vsync2(rdev);
183                 WREG32(RADEON_CRTC2_GEN_CNTL,
184                        (crtc2_gen_cntl &
185                         ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
186                        RADEON_CRTC2_DISP_REQ_EN_B);
187         }
188
189         udelay(500);
190 }
191
192 void r100_mc_setup(struct radeon_device *rdev)
193 {
194         uint32_t tmp;
195         int r;
196
197         r = r100_debugfs_mc_info_init(rdev);
198         if (r) {
199                 DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
200         }
201         /* Write VRAM size in case we are limiting it */
202         WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
203         /* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM,
204          * if the aperture is 64MB but we have 32MB VRAM
205          * we report only 32MB VRAM but we have to set MC_FB_LOCATION
206          * to 64MB, otherwise the gpu accidentially dies */
207         tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
208         tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
209         tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
210         WREG32(RADEON_MC_FB_LOCATION, tmp);
211
212         /* Enable bus mastering */
213         tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
214         WREG32(RADEON_BUS_CNTL, tmp);
215
216         if (rdev->flags & RADEON_IS_AGP) {
217                 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
218                 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
219                 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
220                 WREG32(RADEON_MC_AGP_LOCATION, tmp);
221                 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
222         } else {
223                 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
224                 WREG32(RADEON_AGP_BASE, 0);
225         }
226
227         tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
228         tmp |= (7 << 28);
229         WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
230         (void)RREG32(RADEON_HOST_PATH_CNTL);
231         WREG32(RADEON_HOST_PATH_CNTL, tmp);
232         (void)RREG32(RADEON_HOST_PATH_CNTL);
233 }
234
235 int r100_mc_init(struct radeon_device *rdev)
236 {
237         int r;
238
239         if (r100_debugfs_rbbm_init(rdev)) {
240                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
241         }
242
243         r100_gpu_init(rdev);
244         /* Disable gart which also disable out of gart access */
245         r100_pci_gart_disable(rdev);
246
247         /* Setup GPU memory space */
248         rdev->mc.gtt_location = 0xFFFFFFFFUL;
249         if (rdev->flags & RADEON_IS_AGP) {
250                 r = radeon_agp_init(rdev);
251                 if (r) {
252                         printk(KERN_WARNING "[drm] Disabling AGP\n");
253                         rdev->flags &= ~RADEON_IS_AGP;
254                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
255                 } else {
256                         rdev->mc.gtt_location = rdev->mc.agp_base;
257                 }
258         }
259         r = radeon_mc_setup(rdev);
260         if (r) {
261                 return r;
262         }
263
264         r100_mc_disable_clients(rdev);
265         if (r100_mc_wait_for_idle(rdev)) {
266                 printk(KERN_WARNING "Failed to wait MC idle while "
267                        "programming pipes. Bad things might happen.\n");
268         }
269
270         r100_mc_setup(rdev);
271         return 0;
272 }
273
274 void r100_mc_fini(struct radeon_device *rdev)
275 {
276         r100_pci_gart_disable(rdev);
277         radeon_gart_table_ram_free(rdev);
278         radeon_gart_fini(rdev);
279 }
280
281
282 /*
283  * Interrupts
284  */
285 int r100_irq_set(struct radeon_device *rdev)
286 {
287         uint32_t tmp = 0;
288
289         if (rdev->irq.sw_int) {
290                 tmp |= RADEON_SW_INT_ENABLE;
291         }
292         if (rdev->irq.crtc_vblank_int[0]) {
293                 tmp |= RADEON_CRTC_VBLANK_MASK;
294         }
295         if (rdev->irq.crtc_vblank_int[1]) {
296                 tmp |= RADEON_CRTC2_VBLANK_MASK;
297         }
298         WREG32(RADEON_GEN_INT_CNTL, tmp);
299         return 0;
300 }
301
302 static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
303 {
304         uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
305         uint32_t irq_mask = RADEON_SW_INT_TEST | RADEON_CRTC_VBLANK_STAT |
306                 RADEON_CRTC2_VBLANK_STAT;
307
308         if (irqs) {
309                 WREG32(RADEON_GEN_INT_STATUS, irqs);
310         }
311         return irqs & irq_mask;
312 }
313
314 int r100_irq_process(struct radeon_device *rdev)
315 {
316         uint32_t status;
317
318         status = r100_irq_ack(rdev);
319         if (!status) {
320                 return IRQ_NONE;
321         }
322         if (rdev->shutdown) {
323                 return IRQ_NONE;
324         }
325         while (status) {
326                 /* SW interrupt */
327                 if (status & RADEON_SW_INT_TEST) {
328                         radeon_fence_process(rdev);
329                 }
330                 /* Vertical blank interrupts */
331                 if (status & RADEON_CRTC_VBLANK_STAT) {
332                         drm_handle_vblank(rdev->ddev, 0);
333                 }
334                 if (status & RADEON_CRTC2_VBLANK_STAT) {
335                         drm_handle_vblank(rdev->ddev, 1);
336                 }
337                 status = r100_irq_ack(rdev);
338         }
339         return IRQ_HANDLED;
340 }
341
342 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
343 {
344         if (crtc == 0)
345                 return RREG32(RADEON_CRTC_CRNT_FRAME);
346         else
347                 return RREG32(RADEON_CRTC2_CRNT_FRAME);
348 }
349
350
351 /*
352  * Fence emission
353  */
354 void r100_fence_ring_emit(struct radeon_device *rdev,
355                           struct radeon_fence *fence)
356 {
357         /* Who ever call radeon_fence_emit should call ring_lock and ask
358          * for enough space (today caller are ib schedule and buffer move) */
359         /* Wait until IDLE & CLEAN */
360         radeon_ring_write(rdev, PACKET0(0x1720, 0));
361         radeon_ring_write(rdev, (1 << 16) | (1 << 17));
362         /* Emit fence sequence & fire IRQ */
363         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
364         radeon_ring_write(rdev, fence->seq);
365         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
366         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
367 }
368
369
370 /*
371  * Writeback
372  */
373 int r100_wb_init(struct radeon_device *rdev)
374 {
375         int r;
376
377         if (rdev->wb.wb_obj == NULL) {
378                 r = radeon_object_create(rdev, NULL, 4096,
379                                          true,
380                                          RADEON_GEM_DOMAIN_GTT,
381                                          false, &rdev->wb.wb_obj);
382                 if (r) {
383                         DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
384                         return r;
385                 }
386                 r = radeon_object_pin(rdev->wb.wb_obj,
387                                       RADEON_GEM_DOMAIN_GTT,
388                                       &rdev->wb.gpu_addr);
389                 if (r) {
390                         DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
391                         return r;
392                 }
393                 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
394                 if (r) {
395                         DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
396                         return r;
397                 }
398         }
399         WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr);
400         WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024);
401         WREG32(RADEON_SCRATCH_UMSK, 0xff);
402         return 0;
403 }
404
405 void r100_wb_fini(struct radeon_device *rdev)
406 {
407         if (rdev->wb.wb_obj) {
408                 radeon_object_kunmap(rdev->wb.wb_obj);
409                 radeon_object_unpin(rdev->wb.wb_obj);
410                 radeon_object_unref(&rdev->wb.wb_obj);
411                 rdev->wb.wb = NULL;
412                 rdev->wb.wb_obj = NULL;
413         }
414 }
415
416 int r100_copy_blit(struct radeon_device *rdev,
417                    uint64_t src_offset,
418                    uint64_t dst_offset,
419                    unsigned num_pages,
420                    struct radeon_fence *fence)
421 {
422         uint32_t cur_pages;
423         uint32_t stride_bytes = PAGE_SIZE;
424         uint32_t pitch;
425         uint32_t stride_pixels;
426         unsigned ndw;
427         int num_loops;
428         int r = 0;
429
430         /* radeon limited to 16k stride */
431         stride_bytes &= 0x3fff;
432         /* radeon pitch is /64 */
433         pitch = stride_bytes / 64;
434         stride_pixels = stride_bytes / 4;
435         num_loops = DIV_ROUND_UP(num_pages, 8191);
436
437         /* Ask for enough room for blit + flush + fence */
438         ndw = 64 + (10 * num_loops);
439         r = radeon_ring_lock(rdev, ndw);
440         if (r) {
441                 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
442                 return -EINVAL;
443         }
444         while (num_pages > 0) {
445                 cur_pages = num_pages;
446                 if (cur_pages > 8191) {
447                         cur_pages = 8191;
448                 }
449                 num_pages -= cur_pages;
450
451                 /* pages are in Y direction - height
452                    page width in X direction - width */
453                 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
454                 radeon_ring_write(rdev,
455                                   RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
456                                   RADEON_GMC_DST_PITCH_OFFSET_CNTL |
457                                   RADEON_GMC_SRC_CLIPPING |
458                                   RADEON_GMC_DST_CLIPPING |
459                                   RADEON_GMC_BRUSH_NONE |
460                                   (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
461                                   RADEON_GMC_SRC_DATATYPE_COLOR |
462                                   RADEON_ROP3_S |
463                                   RADEON_DP_SRC_SOURCE_MEMORY |
464                                   RADEON_GMC_CLR_CMP_CNTL_DIS |
465                                   RADEON_GMC_WR_MSK_DIS);
466                 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
467                 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
468                 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
469                 radeon_ring_write(rdev, 0);
470                 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
471                 radeon_ring_write(rdev, num_pages);
472                 radeon_ring_write(rdev, num_pages);
473                 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
474         }
475         radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
476         radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
477         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
478         radeon_ring_write(rdev,
479                           RADEON_WAIT_2D_IDLECLEAN |
480                           RADEON_WAIT_HOST_IDLECLEAN |
481                           RADEON_WAIT_DMA_GUI_IDLE);
482         if (fence) {
483                 r = radeon_fence_emit(rdev, fence);
484         }
485         radeon_ring_unlock_commit(rdev);
486         return r;
487 }
488
489
490 /*
491  * CP
492  */
493 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
494 {
495         unsigned i;
496         u32 tmp;
497
498         for (i = 0; i < rdev->usec_timeout; i++) {
499                 tmp = RREG32(R_000E40_RBBM_STATUS);
500                 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
501                         return 0;
502                 }
503                 udelay(1);
504         }
505         return -1;
506 }
507
508 void r100_ring_start(struct radeon_device *rdev)
509 {
510         int r;
511
512         r = radeon_ring_lock(rdev, 2);
513         if (r) {
514                 return;
515         }
516         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
517         radeon_ring_write(rdev,
518                           RADEON_ISYNC_ANY2D_IDLE3D |
519                           RADEON_ISYNC_ANY3D_IDLE2D |
520                           RADEON_ISYNC_WAIT_IDLEGUI |
521                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
522         radeon_ring_unlock_commit(rdev);
523 }
524
525
526 /* Load the microcode for the CP */
527 static int r100_cp_init_microcode(struct radeon_device *rdev)
528 {
529         struct platform_device *pdev;
530         const char *fw_name = NULL;
531         int err;
532
533         DRM_DEBUG("\n");
534
535         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
536         err = IS_ERR(pdev);
537         if (err) {
538                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
539                 return -EINVAL;
540         }
541         if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
542             (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
543             (rdev->family == CHIP_RS200)) {
544                 DRM_INFO("Loading R100 Microcode\n");
545                 fw_name = FIRMWARE_R100;
546         } else if ((rdev->family == CHIP_R200) ||
547                    (rdev->family == CHIP_RV250) ||
548                    (rdev->family == CHIP_RV280) ||
549                    (rdev->family == CHIP_RS300)) {
550                 DRM_INFO("Loading R200 Microcode\n");
551                 fw_name = FIRMWARE_R200;
552         } else if ((rdev->family == CHIP_R300) ||
553                    (rdev->family == CHIP_R350) ||
554                    (rdev->family == CHIP_RV350) ||
555                    (rdev->family == CHIP_RV380) ||
556                    (rdev->family == CHIP_RS400) ||
557                    (rdev->family == CHIP_RS480)) {
558                 DRM_INFO("Loading R300 Microcode\n");
559                 fw_name = FIRMWARE_R300;
560         } else if ((rdev->family == CHIP_R420) ||
561                    (rdev->family == CHIP_R423) ||
562                    (rdev->family == CHIP_RV410)) {
563                 DRM_INFO("Loading R400 Microcode\n");
564                 fw_name = FIRMWARE_R420;
565         } else if ((rdev->family == CHIP_RS690) ||
566                    (rdev->family == CHIP_RS740)) {
567                 DRM_INFO("Loading RS690/RS740 Microcode\n");
568                 fw_name = FIRMWARE_RS690;
569         } else if (rdev->family == CHIP_RS600) {
570                 DRM_INFO("Loading RS600 Microcode\n");
571                 fw_name = FIRMWARE_RS600;
572         } else if ((rdev->family == CHIP_RV515) ||
573                    (rdev->family == CHIP_R520) ||
574                    (rdev->family == CHIP_RV530) ||
575                    (rdev->family == CHIP_R580) ||
576                    (rdev->family == CHIP_RV560) ||
577                    (rdev->family == CHIP_RV570)) {
578                 DRM_INFO("Loading R500 Microcode\n");
579                 fw_name = FIRMWARE_R520;
580         }
581
582         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
583         platform_device_unregister(pdev);
584         if (err) {
585                 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
586                        fw_name);
587         } else if (rdev->me_fw->size % 8) {
588                 printk(KERN_ERR
589                        "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
590                        rdev->me_fw->size, fw_name);
591                 err = -EINVAL;
592                 release_firmware(rdev->me_fw);
593                 rdev->me_fw = NULL;
594         }
595         return err;
596 }
597 static void r100_cp_load_microcode(struct radeon_device *rdev)
598 {
599         const __be32 *fw_data;
600         int i, size;
601
602         if (r100_gui_wait_for_idle(rdev)) {
603                 printk(KERN_WARNING "Failed to wait GUI idle while "
604                        "programming pipes. Bad things might happen.\n");
605         }
606
607         if (rdev->me_fw) {
608                 size = rdev->me_fw->size / 4;
609                 fw_data = (const __be32 *)&rdev->me_fw->data[0];
610                 WREG32(RADEON_CP_ME_RAM_ADDR, 0);
611                 for (i = 0; i < size; i += 2) {
612                         WREG32(RADEON_CP_ME_RAM_DATAH,
613                                be32_to_cpup(&fw_data[i]));
614                         WREG32(RADEON_CP_ME_RAM_DATAL,
615                                be32_to_cpup(&fw_data[i + 1]));
616                 }
617         }
618 }
619
620 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
621 {
622         unsigned rb_bufsz;
623         unsigned rb_blksz;
624         unsigned max_fetch;
625         unsigned pre_write_timer;
626         unsigned pre_write_limit;
627         unsigned indirect2_start;
628         unsigned indirect1_start;
629         uint32_t tmp;
630         int r;
631
632         if (r100_debugfs_cp_init(rdev)) {
633                 DRM_ERROR("Failed to register debugfs file for CP !\n");
634         }
635         /* Reset CP */
636         tmp = RREG32(RADEON_CP_CSQ_STAT);
637         if ((tmp & (1 << 31))) {
638                 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
639                 WREG32(RADEON_CP_CSQ_MODE, 0);
640                 WREG32(RADEON_CP_CSQ_CNTL, 0);
641                 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
642                 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
643                 mdelay(2);
644                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
645                 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
646                 mdelay(2);
647                 tmp = RREG32(RADEON_CP_CSQ_STAT);
648                 if ((tmp & (1 << 31))) {
649                         DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
650                 }
651         } else {
652                 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
653         }
654
655         if (!rdev->me_fw) {
656                 r = r100_cp_init_microcode(rdev);
657                 if (r) {
658                         DRM_ERROR("Failed to load firmware!\n");
659                         return r;
660                 }
661         }
662
663         /* Align ring size */
664         rb_bufsz = drm_order(ring_size / 8);
665         ring_size = (1 << (rb_bufsz + 1)) * 4;
666         r100_cp_load_microcode(rdev);
667         r = radeon_ring_init(rdev, ring_size);
668         if (r) {
669                 return r;
670         }
671         /* Each time the cp read 1024 bytes (16 dword/quadword) update
672          * the rptr copy in system ram */
673         rb_blksz = 9;
674         /* cp will read 128bytes at a time (4 dwords) */
675         max_fetch = 1;
676         rdev->cp.align_mask = 16 - 1;
677         /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
678         pre_write_timer = 64;
679         /* Force CP_RB_WPTR write if written more than one time before the
680          * delay expire
681          */
682         pre_write_limit = 0;
683         /* Setup the cp cache like this (cache size is 96 dwords) :
684          *      RING            0  to 15
685          *      INDIRECT1       16 to 79
686          *      INDIRECT2       80 to 95
687          * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
688          *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
689          *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
690          * Idea being that most of the gpu cmd will be through indirect1 buffer
691          * so it gets the bigger cache.
692          */
693         indirect2_start = 80;
694         indirect1_start = 16;
695         /* cp setup */
696         WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
697         WREG32(RADEON_CP_RB_CNTL,
698 #ifdef __BIG_ENDIAN
699                RADEON_BUF_SWAP_32BIT |
700 #endif
701                REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
702                REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
703                REG_SET(RADEON_MAX_FETCH, max_fetch) |
704                RADEON_RB_NO_UPDATE);
705         /* Set ring address */
706         DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
707         WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
708         /* Force read & write ptr to 0 */
709         tmp = RREG32(RADEON_CP_RB_CNTL);
710         WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
711         WREG32(RADEON_CP_RB_RPTR_WR, 0);
712         WREG32(RADEON_CP_RB_WPTR, 0);
713         WREG32(RADEON_CP_RB_CNTL, tmp);
714         udelay(10);
715         rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
716         rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
717         /* Set cp mode to bus mastering & enable cp*/
718         WREG32(RADEON_CP_CSQ_MODE,
719                REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
720                REG_SET(RADEON_INDIRECT1_START, indirect1_start));
721         WREG32(0x718, 0);
722         WREG32(0x744, 0x00004D4D);
723         WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
724         radeon_ring_start(rdev);
725         r = radeon_ring_test(rdev);
726         if (r) {
727                 DRM_ERROR("radeon: cp isn't working (%d).\n", r);
728                 return r;
729         }
730         rdev->cp.ready = true;
731         return 0;
732 }
733
734 void r100_cp_fini(struct radeon_device *rdev)
735 {
736         if (r100_cp_wait_for_idle(rdev)) {
737                 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
738         }
739         /* Disable ring */
740         r100_cp_disable(rdev);
741         radeon_ring_fini(rdev);
742         DRM_INFO("radeon: cp finalized\n");
743 }
744
745 void r100_cp_disable(struct radeon_device *rdev)
746 {
747         /* Disable ring */
748         rdev->cp.ready = false;
749         WREG32(RADEON_CP_CSQ_MODE, 0);
750         WREG32(RADEON_CP_CSQ_CNTL, 0);
751         if (r100_gui_wait_for_idle(rdev)) {
752                 printk(KERN_WARNING "Failed to wait GUI idle while "
753                        "programming pipes. Bad things might happen.\n");
754         }
755 }
756
757 int r100_cp_reset(struct radeon_device *rdev)
758 {
759         uint32_t tmp;
760         bool reinit_cp;
761         int i;
762
763         reinit_cp = rdev->cp.ready;
764         rdev->cp.ready = false;
765         WREG32(RADEON_CP_CSQ_MODE, 0);
766         WREG32(RADEON_CP_CSQ_CNTL, 0);
767         WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
768         (void)RREG32(RADEON_RBBM_SOFT_RESET);
769         udelay(200);
770         WREG32(RADEON_RBBM_SOFT_RESET, 0);
771         /* Wait to prevent race in RBBM_STATUS */
772         mdelay(1);
773         for (i = 0; i < rdev->usec_timeout; i++) {
774                 tmp = RREG32(RADEON_RBBM_STATUS);
775                 if (!(tmp & (1 << 16))) {
776                         DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
777                                  tmp);
778                         if (reinit_cp) {
779                                 return r100_cp_init(rdev, rdev->cp.ring_size);
780                         }
781                         return 0;
782                 }
783                 DRM_UDELAY(1);
784         }
785         tmp = RREG32(RADEON_RBBM_STATUS);
786         DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
787         return -1;
788 }
789
790 void r100_cp_commit(struct radeon_device *rdev)
791 {
792         WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
793         (void)RREG32(RADEON_CP_RB_WPTR);
794 }
795
796
797 /*
798  * CS functions
799  */
800 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
801                           struct radeon_cs_packet *pkt,
802                           const unsigned *auth, unsigned n,
803                           radeon_packet0_check_t check)
804 {
805         unsigned reg;
806         unsigned i, j, m;
807         unsigned idx;
808         int r;
809
810         idx = pkt->idx + 1;
811         reg = pkt->reg;
812         /* Check that register fall into register range
813          * determined by the number of entry (n) in the
814          * safe register bitmap.
815          */
816         if (pkt->one_reg_wr) {
817                 if ((reg >> 7) > n) {
818                         return -EINVAL;
819                 }
820         } else {
821                 if (((reg + (pkt->count << 2)) >> 7) > n) {
822                         return -EINVAL;
823                 }
824         }
825         for (i = 0; i <= pkt->count; i++, idx++) {
826                 j = (reg >> 7);
827                 m = 1 << ((reg >> 2) & 31);
828                 if (auth[j] & m) {
829                         r = check(p, pkt, idx, reg);
830                         if (r) {
831                                 return r;
832                         }
833                 }
834                 if (pkt->one_reg_wr) {
835                         if (!(auth[j] & m)) {
836                                 break;
837                         }
838                 } else {
839                         reg += 4;
840                 }
841         }
842         return 0;
843 }
844
845 void r100_cs_dump_packet(struct radeon_cs_parser *p,
846                          struct radeon_cs_packet *pkt)
847 {
848         struct radeon_cs_chunk *ib_chunk;
849         volatile uint32_t *ib;
850         unsigned i;
851         unsigned idx;
852
853         ib = p->ib->ptr;
854         ib_chunk = &p->chunks[p->chunk_ib_idx];
855         idx = pkt->idx;
856         for (i = 0; i <= (pkt->count + 1); i++, idx++) {
857                 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
858         }
859 }
860
861 /**
862  * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
863  * @parser:     parser structure holding parsing context.
864  * @pkt:        where to store packet informations
865  *
866  * Assume that chunk_ib_index is properly set. Will return -EINVAL
867  * if packet is bigger than remaining ib size. or if packets is unknown.
868  **/
869 int r100_cs_packet_parse(struct radeon_cs_parser *p,
870                          struct radeon_cs_packet *pkt,
871                          unsigned idx)
872 {
873         struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
874         uint32_t header;
875
876         if (idx >= ib_chunk->length_dw) {
877                 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
878                           idx, ib_chunk->length_dw);
879                 return -EINVAL;
880         }
881         header = ib_chunk->kdata[idx];
882         pkt->idx = idx;
883         pkt->type = CP_PACKET_GET_TYPE(header);
884         pkt->count = CP_PACKET_GET_COUNT(header);
885         switch (pkt->type) {
886         case PACKET_TYPE0:
887                 pkt->reg = CP_PACKET0_GET_REG(header);
888                 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
889                 break;
890         case PACKET_TYPE3:
891                 pkt->opcode = CP_PACKET3_GET_OPCODE(header);
892                 break;
893         case PACKET_TYPE2:
894                 pkt->count = -1;
895                 break;
896         default:
897                 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
898                 return -EINVAL;
899         }
900         if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
901                 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
902                           pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
903                 return -EINVAL;
904         }
905         return 0;
906 }
907
908 /**
909  * r100_cs_packet_next_vline() - parse userspace VLINE packet
910  * @parser:             parser structure holding parsing context.
911  *
912  * Userspace sends a special sequence for VLINE waits.
913  * PACKET0 - VLINE_START_END + value
914  * PACKET0 - WAIT_UNTIL +_value
915  * RELOC (P3) - crtc_id in reloc.
916  *
917  * This function parses this and relocates the VLINE START END
918  * and WAIT UNTIL packets to the correct crtc.
919  * It also detects a switched off crtc and nulls out the
920  * wait in that case.
921  */
922 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
923 {
924         struct radeon_cs_chunk *ib_chunk;
925         struct drm_mode_object *obj;
926         struct drm_crtc *crtc;
927         struct radeon_crtc *radeon_crtc;
928         struct radeon_cs_packet p3reloc, waitreloc;
929         int crtc_id;
930         int r;
931         uint32_t header, h_idx, reg;
932
933         ib_chunk = &p->chunks[p->chunk_ib_idx];
934
935         /* parse the wait until */
936         r = r100_cs_packet_parse(p, &waitreloc, p->idx);
937         if (r)
938                 return r;
939
940         /* check its a wait until and only 1 count */
941         if (waitreloc.reg != RADEON_WAIT_UNTIL ||
942             waitreloc.count != 0) {
943                 DRM_ERROR("vline wait had illegal wait until segment\n");
944                 r = -EINVAL;
945                 return r;
946         }
947
948         if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) {
949                 DRM_ERROR("vline wait had illegal wait until\n");
950                 r = -EINVAL;
951                 return r;
952         }
953
954         /* jump over the NOP */
955         r = r100_cs_packet_parse(p, &p3reloc, p->idx);
956         if (r)
957                 return r;
958
959         h_idx = p->idx - 2;
960         p->idx += waitreloc.count;
961         p->idx += p3reloc.count;
962
963         header = ib_chunk->kdata[h_idx];
964         crtc_id = ib_chunk->kdata[h_idx + 5];
965         reg = ib_chunk->kdata[h_idx] >> 2;
966         mutex_lock(&p->rdev->ddev->mode_config.mutex);
967         obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
968         if (!obj) {
969                 DRM_ERROR("cannot find crtc %d\n", crtc_id);
970                 r = -EINVAL;
971                 goto out;
972         }
973         crtc = obj_to_crtc(obj);
974         radeon_crtc = to_radeon_crtc(crtc);
975         crtc_id = radeon_crtc->crtc_id;
976
977         if (!crtc->enabled) {
978                 /* if the CRTC isn't enabled - we need to nop out the wait until */
979                 ib_chunk->kdata[h_idx + 2] = PACKET2(0);
980                 ib_chunk->kdata[h_idx + 3] = PACKET2(0);
981         } else if (crtc_id == 1) {
982                 switch (reg) {
983                 case AVIVO_D1MODE_VLINE_START_END:
984                         header &= R300_CP_PACKET0_REG_MASK;
985                         header |= AVIVO_D2MODE_VLINE_START_END >> 2;
986                         break;
987                 case RADEON_CRTC_GUI_TRIG_VLINE:
988                         header &= R300_CP_PACKET0_REG_MASK;
989                         header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
990                         break;
991                 default:
992                         DRM_ERROR("unknown crtc reloc\n");
993                         r = -EINVAL;
994                         goto out;
995                 }
996                 ib_chunk->kdata[h_idx] = header;
997                 ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
998         }
999 out:
1000         mutex_unlock(&p->rdev->ddev->mode_config.mutex);
1001         return r;
1002 }
1003
1004 /**
1005  * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1006  * @parser:             parser structure holding parsing context.
1007  * @data:               pointer to relocation data
1008  * @offset_start:       starting offset
1009  * @offset_mask:        offset mask (to align start offset on)
1010  * @reloc:              reloc informations
1011  *
1012  * Check next packet is relocation packet3, do bo validation and compute
1013  * GPU offset using the provided start.
1014  **/
1015 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
1016                               struct radeon_cs_reloc **cs_reloc)
1017 {
1018         struct radeon_cs_chunk *ib_chunk;
1019         struct radeon_cs_chunk *relocs_chunk;
1020         struct radeon_cs_packet p3reloc;
1021         unsigned idx;
1022         int r;
1023
1024         if (p->chunk_relocs_idx == -1) {
1025                 DRM_ERROR("No relocation chunk !\n");
1026                 return -EINVAL;
1027         }
1028         *cs_reloc = NULL;
1029         ib_chunk = &p->chunks[p->chunk_ib_idx];
1030         relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1031         r = r100_cs_packet_parse(p, &p3reloc, p->idx);
1032         if (r) {
1033                 return r;
1034         }
1035         p->idx += p3reloc.count + 2;
1036         if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1037                 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1038                           p3reloc.idx);
1039                 r100_cs_dump_packet(p, &p3reloc);
1040                 return -EINVAL;
1041         }
1042         idx = ib_chunk->kdata[p3reloc.idx + 1];
1043         if (idx >= relocs_chunk->length_dw) {
1044                 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1045                           idx, relocs_chunk->length_dw);
1046                 r100_cs_dump_packet(p, &p3reloc);
1047                 return -EINVAL;
1048         }
1049         /* FIXME: we assume reloc size is 4 dwords */
1050         *cs_reloc = p->relocs_ptr[(idx / 4)];
1051         return 0;
1052 }
1053
1054 static int r100_get_vtx_size(uint32_t vtx_fmt)
1055 {
1056         int vtx_size;
1057         vtx_size = 2;
1058         /* ordered according to bits in spec */
1059         if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1060                 vtx_size++;
1061         if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1062                 vtx_size += 3;
1063         if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1064                 vtx_size++;
1065         if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1066                 vtx_size++;
1067         if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1068                 vtx_size += 3;
1069         if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1070                 vtx_size++;
1071         if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1072                 vtx_size++;
1073         if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1074                 vtx_size += 2;
1075         if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1076                 vtx_size += 2;
1077         if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1078                 vtx_size++;
1079         if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1080                 vtx_size += 2;
1081         if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1082                 vtx_size++;
1083         if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1084                 vtx_size += 2;
1085         if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1086                 vtx_size++;
1087         if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1088                 vtx_size++;
1089         /* blend weight */
1090         if (vtx_fmt & (0x7 << 15))
1091                 vtx_size += (vtx_fmt >> 15) & 0x7;
1092         if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1093                 vtx_size += 3;
1094         if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1095                 vtx_size += 2;
1096         if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1097                 vtx_size++;
1098         if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1099                 vtx_size++;
1100         if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1101                 vtx_size++;
1102         if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1103                 vtx_size++;
1104         return vtx_size;
1105 }
1106
1107 static int r100_packet0_check(struct radeon_cs_parser *p,
1108                               struct radeon_cs_packet *pkt,
1109                               unsigned idx, unsigned reg)
1110 {
1111         struct radeon_cs_chunk *ib_chunk;
1112         struct radeon_cs_reloc *reloc;
1113         struct r100_cs_track *track;
1114         volatile uint32_t *ib;
1115         uint32_t tmp;
1116         int r;
1117         int i, face;
1118         u32 tile_flags = 0;
1119
1120         ib = p->ib->ptr;
1121         ib_chunk = &p->chunks[p->chunk_ib_idx];
1122         track = (struct r100_cs_track *)p->track;
1123
1124         switch (reg) {
1125         case RADEON_CRTC_GUI_TRIG_VLINE:
1126                 r = r100_cs_packet_parse_vline(p);
1127                 if (r) {
1128                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1129                                   idx, reg);
1130                         r100_cs_dump_packet(p, pkt);
1131                         return r;
1132                 }
1133                 break;
1134                 /* FIXME: only allow PACKET3 blit? easier to check for out of
1135                  * range access */
1136         case RADEON_DST_PITCH_OFFSET:
1137         case RADEON_SRC_PITCH_OFFSET:
1138                 r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1139                 if (r)
1140                         return r;
1141                 break;
1142         case RADEON_RB3D_DEPTHOFFSET:
1143                 r = r100_cs_packet_next_reloc(p, &reloc);
1144                 if (r) {
1145                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1146                                   idx, reg);
1147                         r100_cs_dump_packet(p, pkt);
1148                         return r;
1149                 }
1150                 track->zb.robj = reloc->robj;
1151                 track->zb.offset = ib_chunk->kdata[idx];
1152                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1153                 break;
1154         case RADEON_RB3D_COLOROFFSET:
1155                 r = r100_cs_packet_next_reloc(p, &reloc);
1156                 if (r) {
1157                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1158                                   idx, reg);
1159                         r100_cs_dump_packet(p, pkt);
1160                         return r;
1161                 }
1162                 track->cb[0].robj = reloc->robj;
1163                 track->cb[0].offset = ib_chunk->kdata[idx];
1164                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1165                 break;
1166         case RADEON_PP_TXOFFSET_0:
1167         case RADEON_PP_TXOFFSET_1:
1168         case RADEON_PP_TXOFFSET_2:
1169                 i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1170                 r = r100_cs_packet_next_reloc(p, &reloc);
1171                 if (r) {
1172                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1173                                   idx, reg);
1174                         r100_cs_dump_packet(p, pkt);
1175                         return r;
1176                 }
1177                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1178                 track->textures[i].robj = reloc->robj;
1179                 break;
1180         case RADEON_PP_CUBIC_OFFSET_T0_0:
1181         case RADEON_PP_CUBIC_OFFSET_T0_1:
1182         case RADEON_PP_CUBIC_OFFSET_T0_2:
1183         case RADEON_PP_CUBIC_OFFSET_T0_3:
1184         case RADEON_PP_CUBIC_OFFSET_T0_4:
1185                 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1186                 r = r100_cs_packet_next_reloc(p, &reloc);
1187                 if (r) {
1188                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1189                                   idx, reg);
1190                         r100_cs_dump_packet(p, pkt);
1191                         return r;
1192                 }
1193                 track->textures[0].cube_info[i].offset = ib_chunk->kdata[idx];
1194                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1195                 track->textures[0].cube_info[i].robj = reloc->robj;
1196                 break;
1197         case RADEON_PP_CUBIC_OFFSET_T1_0:
1198         case RADEON_PP_CUBIC_OFFSET_T1_1:
1199         case RADEON_PP_CUBIC_OFFSET_T1_2:
1200         case RADEON_PP_CUBIC_OFFSET_T1_3:
1201         case RADEON_PP_CUBIC_OFFSET_T1_4:
1202                 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1203                 r = r100_cs_packet_next_reloc(p, &reloc);
1204                 if (r) {
1205                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1206                                   idx, reg);
1207                         r100_cs_dump_packet(p, pkt);
1208                         return r;
1209                 }
1210                 track->textures[1].cube_info[i].offset = ib_chunk->kdata[idx];
1211                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1212                 track->textures[1].cube_info[i].robj = reloc->robj;
1213                 break;
1214         case RADEON_PP_CUBIC_OFFSET_T2_0:
1215         case RADEON_PP_CUBIC_OFFSET_T2_1:
1216         case RADEON_PP_CUBIC_OFFSET_T2_2:
1217         case RADEON_PP_CUBIC_OFFSET_T2_3:
1218         case RADEON_PP_CUBIC_OFFSET_T2_4:
1219                 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1220                 r = r100_cs_packet_next_reloc(p, &reloc);
1221                 if (r) {
1222                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1223                                   idx, reg);
1224                         r100_cs_dump_packet(p, pkt);
1225                         return r;
1226                 }
1227                 track->textures[2].cube_info[i].offset = ib_chunk->kdata[idx];
1228                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1229                 track->textures[2].cube_info[i].robj = reloc->robj;
1230                 break;
1231         case RADEON_RE_WIDTH_HEIGHT:
1232                 track->maxy = ((ib_chunk->kdata[idx] >> 16) & 0x7FF);
1233                 break;
1234         case RADEON_RB3D_COLORPITCH:
1235                 r = r100_cs_packet_next_reloc(p, &reloc);
1236                 if (r) {
1237                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1238                                   idx, reg);
1239                         r100_cs_dump_packet(p, pkt);
1240                         return r;
1241                 }
1242
1243                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1244                         tile_flags |= RADEON_COLOR_TILE_ENABLE;
1245                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1246                         tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1247
1248                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1249                 tmp |= tile_flags;
1250                 ib[idx] = tmp;
1251
1252                 track->cb[0].pitch = ib_chunk->kdata[idx] & RADEON_COLORPITCH_MASK;
1253                 break;
1254         case RADEON_RB3D_DEPTHPITCH:
1255                 track->zb.pitch = ib_chunk->kdata[idx] & RADEON_DEPTHPITCH_MASK;
1256                 break;
1257         case RADEON_RB3D_CNTL:
1258                 switch ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1259                 case 7:
1260                 case 8:
1261                 case 9:
1262                 case 11:
1263                 case 12:
1264                         track->cb[0].cpp = 1;
1265                         break;
1266                 case 3:
1267                 case 4:
1268                 case 15:
1269                         track->cb[0].cpp = 2;
1270                         break;
1271                 case 6:
1272                         track->cb[0].cpp = 4;
1273                         break;
1274                 default:
1275                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1276                                   ((ib_chunk->kdata[idx] >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1277                         return -EINVAL;
1278                 }
1279                 track->z_enabled = !!(ib_chunk->kdata[idx] & RADEON_Z_ENABLE);
1280                 break;
1281         case RADEON_RB3D_ZSTENCILCNTL:
1282                 switch (ib_chunk->kdata[idx] & 0xf) {
1283                 case 0:
1284                         track->zb.cpp = 2;
1285                         break;
1286                 case 2:
1287                 case 3:
1288                 case 4:
1289                 case 5:
1290                 case 9:
1291                 case 11:
1292                         track->zb.cpp = 4;
1293                         break;
1294                 default:
1295                         break;
1296                 }
1297                 break;
1298         case RADEON_RB3D_ZPASS_ADDR:
1299                 r = r100_cs_packet_next_reloc(p, &reloc);
1300                 if (r) {
1301                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1302                                   idx, reg);
1303                         r100_cs_dump_packet(p, pkt);
1304                         return r;
1305                 }
1306                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1307                 break;
1308         case RADEON_PP_CNTL:
1309                 {
1310                         uint32_t temp = ib_chunk->kdata[idx] >> 4;
1311                         for (i = 0; i < track->num_texture; i++)
1312                                 track->textures[i].enabled = !!(temp & (1 << i));
1313                 }
1314                 break;
1315         case RADEON_SE_VF_CNTL:
1316                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1317                 break;
1318         case RADEON_SE_VTX_FMT:
1319                 track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx]);
1320                 break;
1321         case RADEON_PP_TEX_SIZE_0:
1322         case RADEON_PP_TEX_SIZE_1:
1323         case RADEON_PP_TEX_SIZE_2:
1324                 i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1325                 track->textures[i].width = (ib_chunk->kdata[idx] & RADEON_TEX_USIZE_MASK) + 1;
1326                 track->textures[i].height = ((ib_chunk->kdata[idx] & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1327                 break;
1328         case RADEON_PP_TEX_PITCH_0:
1329         case RADEON_PP_TEX_PITCH_1:
1330         case RADEON_PP_TEX_PITCH_2:
1331                 i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1332                 track->textures[i].pitch = ib_chunk->kdata[idx] + 32;
1333                 break;
1334         case RADEON_PP_TXFILTER_0:
1335         case RADEON_PP_TXFILTER_1:
1336         case RADEON_PP_TXFILTER_2:
1337                 i = (reg - RADEON_PP_TXFILTER_0) / 24;
1338                 track->textures[i].num_levels = ((ib_chunk->kdata[idx] & RADEON_MAX_MIP_LEVEL_MASK)
1339                                                  >> RADEON_MAX_MIP_LEVEL_SHIFT);
1340                 tmp = (ib_chunk->kdata[idx] >> 23) & 0x7;
1341                 if (tmp == 2 || tmp == 6)
1342                         track->textures[i].roundup_w = false;
1343                 tmp = (ib_chunk->kdata[idx] >> 27) & 0x7;
1344                 if (tmp == 2 || tmp == 6)
1345                         track->textures[i].roundup_h = false;
1346                 break;
1347         case RADEON_PP_TXFORMAT_0:
1348         case RADEON_PP_TXFORMAT_1:
1349         case RADEON_PP_TXFORMAT_2:
1350                 i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1351                 if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_NON_POWER2) {
1352                         track->textures[i].use_pitch = 1;
1353                 } else {
1354                         track->textures[i].use_pitch = 0;
1355                         track->textures[i].width = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1356                         track->textures[i].height = 1 << ((ib_chunk->kdata[idx] >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1357                 }
1358                 if (ib_chunk->kdata[idx] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1359                         track->textures[i].tex_coord_type = 2;
1360                 switch ((ib_chunk->kdata[idx] & RADEON_TXFORMAT_FORMAT_MASK)) {
1361                 case RADEON_TXFORMAT_I8:
1362                 case RADEON_TXFORMAT_RGB332:
1363                 case RADEON_TXFORMAT_Y8:
1364                         track->textures[i].cpp = 1;
1365                         break;
1366                 case RADEON_TXFORMAT_AI88:
1367                 case RADEON_TXFORMAT_ARGB1555:
1368                 case RADEON_TXFORMAT_RGB565:
1369                 case RADEON_TXFORMAT_ARGB4444:
1370                 case RADEON_TXFORMAT_VYUY422:
1371                 case RADEON_TXFORMAT_YVYU422:
1372                 case RADEON_TXFORMAT_DXT1:
1373                 case RADEON_TXFORMAT_SHADOW16:
1374                 case RADEON_TXFORMAT_LDUDV655:
1375                 case RADEON_TXFORMAT_DUDV88:
1376                         track->textures[i].cpp = 2;
1377                         break;
1378                 case RADEON_TXFORMAT_ARGB8888:
1379                 case RADEON_TXFORMAT_RGBA8888:
1380                 case RADEON_TXFORMAT_DXT23:
1381                 case RADEON_TXFORMAT_DXT45:
1382                 case RADEON_TXFORMAT_SHADOW32:
1383                 case RADEON_TXFORMAT_LDUDUV8888:
1384                         track->textures[i].cpp = 4;
1385                         break;
1386                 }
1387                 track->textures[i].cube_info[4].width = 1 << ((ib_chunk->kdata[idx] >> 16) & 0xf);
1388                 track->textures[i].cube_info[4].height = 1 << ((ib_chunk->kdata[idx] >> 20) & 0xf);
1389                 break;
1390         case RADEON_PP_CUBIC_FACES_0:
1391         case RADEON_PP_CUBIC_FACES_1:
1392         case RADEON_PP_CUBIC_FACES_2:
1393                 tmp = ib_chunk->kdata[idx];
1394                 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1395                 for (face = 0; face < 4; face++) {
1396                         track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1397                         track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1398                 }
1399                 break;
1400         default:
1401                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1402                        reg, idx);
1403                 return -EINVAL;
1404         }
1405         return 0;
1406 }
1407
1408 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1409                                          struct radeon_cs_packet *pkt,
1410                                          struct radeon_object *robj)
1411 {
1412         struct radeon_cs_chunk *ib_chunk;
1413         unsigned idx;
1414
1415         ib_chunk = &p->chunks[p->chunk_ib_idx];
1416         idx = pkt->idx + 1;
1417         if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
1418                 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1419                           "(need %u have %lu) !\n",
1420                           ib_chunk->kdata[idx+2] + 1,
1421                           radeon_object_size(robj));
1422                 return -EINVAL;
1423         }
1424         return 0;
1425 }
1426
1427 static int r100_packet3_check(struct radeon_cs_parser *p,
1428                               struct radeon_cs_packet *pkt)
1429 {
1430         struct radeon_cs_chunk *ib_chunk;
1431         struct radeon_cs_reloc *reloc;
1432         struct r100_cs_track *track;
1433         unsigned idx;
1434         unsigned i, c;
1435         volatile uint32_t *ib;
1436         int r;
1437
1438         ib = p->ib->ptr;
1439         ib_chunk = &p->chunks[p->chunk_ib_idx];
1440         idx = pkt->idx + 1;
1441         track = (struct r100_cs_track *)p->track;
1442         switch (pkt->opcode) {
1443         case PACKET3_3D_LOAD_VBPNTR:
1444                 c = ib_chunk->kdata[idx++];
1445                 track->num_arrays = c;
1446                 for (i = 0; i < (c - 1); i += 2, idx += 3) {
1447                         r = r100_cs_packet_next_reloc(p, &reloc);
1448                         if (r) {
1449                                 DRM_ERROR("No reloc for packet3 %d\n",
1450                                           pkt->opcode);
1451                                 r100_cs_dump_packet(p, pkt);
1452                                 return r;
1453                         }
1454                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1455                         track->arrays[i + 0].robj = reloc->robj;
1456                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1457                         track->arrays[i + 0].esize &= 0x7F;
1458                         r = r100_cs_packet_next_reloc(p, &reloc);
1459                         if (r) {
1460                                 DRM_ERROR("No reloc for packet3 %d\n",
1461                                           pkt->opcode);
1462                                 r100_cs_dump_packet(p, pkt);
1463                                 return r;
1464                         }
1465                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1466                         track->arrays[i + 1].robj = reloc->robj;
1467                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1468                         track->arrays[i + 1].esize &= 0x7F;
1469                 }
1470                 if (c & 1) {
1471                         r = r100_cs_packet_next_reloc(p, &reloc);
1472                         if (r) {
1473                                 DRM_ERROR("No reloc for packet3 %d\n",
1474                                           pkt->opcode);
1475                                 r100_cs_dump_packet(p, pkt);
1476                                 return r;
1477                         }
1478                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1479                         track->arrays[i + 0].robj = reloc->robj;
1480                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1481                         track->arrays[i + 0].esize &= 0x7F;
1482                 }
1483                 break;
1484         case PACKET3_INDX_BUFFER:
1485                 r = r100_cs_packet_next_reloc(p, &reloc);
1486                 if (r) {
1487                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1488                         r100_cs_dump_packet(p, pkt);
1489                         return r;
1490                 }
1491                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1492                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1493                 if (r) {
1494                         return r;
1495                 }
1496                 break;
1497         case 0x23:
1498                 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1499                 r = r100_cs_packet_next_reloc(p, &reloc);
1500                 if (r) {
1501                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1502                         r100_cs_dump_packet(p, pkt);
1503                         return r;
1504                 }
1505                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1506                 track->num_arrays = 1;
1507                 track->vtx_size = r100_get_vtx_size(ib_chunk->kdata[idx+2]);
1508
1509                 track->arrays[0].robj = reloc->robj;
1510                 track->arrays[0].esize = track->vtx_size;
1511
1512                 track->max_indx = ib_chunk->kdata[idx+1];
1513
1514                 track->vap_vf_cntl = ib_chunk->kdata[idx+3];
1515                 track->immd_dwords = pkt->count - 1;
1516                 r = r100_cs_track_check(p->rdev, track);
1517                 if (r)
1518                         return r;
1519                 break;
1520         case PACKET3_3D_DRAW_IMMD:
1521                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1522                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1523                         return -EINVAL;
1524                 }
1525                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1526                 track->immd_dwords = pkt->count - 1;
1527                 r = r100_cs_track_check(p->rdev, track);
1528                 if (r)
1529                         return r;
1530                 break;
1531                 /* triggers drawing using in-packet vertex data */
1532         case PACKET3_3D_DRAW_IMMD_2:
1533                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1534                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1535                         return -EINVAL;
1536                 }
1537                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1538                 track->immd_dwords = pkt->count;
1539                 r = r100_cs_track_check(p->rdev, track);
1540                 if (r)
1541                         return r;
1542                 break;
1543                 /* triggers drawing using in-packet vertex data */
1544         case PACKET3_3D_DRAW_VBUF_2:
1545                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1546                 r = r100_cs_track_check(p->rdev, track);
1547                 if (r)
1548                         return r;
1549                 break;
1550                 /* triggers drawing of vertex buffers setup elsewhere */
1551         case PACKET3_3D_DRAW_INDX_2:
1552                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1553                 r = r100_cs_track_check(p->rdev, track);
1554                 if (r)
1555                         return r;
1556                 break;
1557                 /* triggers drawing using indices to vertex buffer */
1558         case PACKET3_3D_DRAW_VBUF:
1559                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1560                 r = r100_cs_track_check(p->rdev, track);
1561                 if (r)
1562                         return r;
1563                 break;
1564                 /* triggers drawing of vertex buffers setup elsewhere */
1565         case PACKET3_3D_DRAW_INDX:
1566                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1567                 r = r100_cs_track_check(p->rdev, track);
1568                 if (r)
1569                         return r;
1570                 break;
1571                 /* triggers drawing using indices to vertex buffer */
1572         case PACKET3_NOP:
1573                 break;
1574         default:
1575                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1576                 return -EINVAL;
1577         }
1578         return 0;
1579 }
1580
1581 int r100_cs_parse(struct radeon_cs_parser *p)
1582 {
1583         struct radeon_cs_packet pkt;
1584         struct r100_cs_track track;
1585         int r;
1586
1587         r100_cs_track_clear(p->rdev, &track);
1588         p->track = &track;
1589         do {
1590                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1591                 if (r) {
1592                         return r;
1593                 }
1594                 p->idx += pkt.count + 2;
1595                 switch (pkt.type) {
1596                         case PACKET_TYPE0:
1597                                 if (p->rdev->family >= CHIP_R200)
1598                                         r = r100_cs_parse_packet0(p, &pkt,
1599                                                                   p->rdev->config.r100.reg_safe_bm,
1600                                                                   p->rdev->config.r100.reg_safe_bm_size,
1601                                                                   &r200_packet0_check);
1602                                 else
1603                                         r = r100_cs_parse_packet0(p, &pkt,
1604                                                                   p->rdev->config.r100.reg_safe_bm,
1605                                                                   p->rdev->config.r100.reg_safe_bm_size,
1606                                                                   &r100_packet0_check);
1607                                 break;
1608                         case PACKET_TYPE2:
1609                                 break;
1610                         case PACKET_TYPE3:
1611                                 r = r100_packet3_check(p, &pkt);
1612                                 break;
1613                         default:
1614                                 DRM_ERROR("Unknown packet type %d !\n",
1615                                           pkt.type);
1616                                 return -EINVAL;
1617                 }
1618                 if (r) {
1619                         return r;
1620                 }
1621         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1622         return 0;
1623 }
1624
1625
1626 /*
1627  * Global GPU functions
1628  */
1629 void r100_errata(struct radeon_device *rdev)
1630 {
1631         rdev->pll_errata = 0;
1632
1633         if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1634                 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
1635         }
1636
1637         if (rdev->family == CHIP_RV100 ||
1638             rdev->family == CHIP_RS100 ||
1639             rdev->family == CHIP_RS200) {
1640                 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
1641         }
1642 }
1643
1644 /* Wait for vertical sync on primary CRTC */
1645 void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
1646 {
1647         uint32_t crtc_gen_cntl, tmp;
1648         int i;
1649
1650         crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
1651         if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
1652             !(crtc_gen_cntl & RADEON_CRTC_EN)) {
1653                 return;
1654         }
1655         /* Clear the CRTC_VBLANK_SAVE bit */
1656         WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
1657         for (i = 0; i < rdev->usec_timeout; i++) {
1658                 tmp = RREG32(RADEON_CRTC_STATUS);
1659                 if (tmp & RADEON_CRTC_VBLANK_SAVE) {
1660                         return;
1661                 }
1662                 DRM_UDELAY(1);
1663         }
1664 }
1665
1666 /* Wait for vertical sync on secondary CRTC */
1667 void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
1668 {
1669         uint32_t crtc2_gen_cntl, tmp;
1670         int i;
1671
1672         crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
1673         if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
1674             !(crtc2_gen_cntl & RADEON_CRTC2_EN))
1675                 return;
1676
1677         /* Clear the CRTC_VBLANK_SAVE bit */
1678         WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
1679         for (i = 0; i < rdev->usec_timeout; i++) {
1680                 tmp = RREG32(RADEON_CRTC2_STATUS);
1681                 if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
1682                         return;
1683                 }
1684                 DRM_UDELAY(1);
1685         }
1686 }
1687
1688 int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
1689 {
1690         unsigned i;
1691         uint32_t tmp;
1692
1693         for (i = 0; i < rdev->usec_timeout; i++) {
1694                 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
1695                 if (tmp >= n) {
1696                         return 0;
1697                 }
1698                 DRM_UDELAY(1);
1699         }
1700         return -1;
1701 }
1702
1703 int r100_gui_wait_for_idle(struct radeon_device *rdev)
1704 {
1705         unsigned i;
1706         uint32_t tmp;
1707
1708         if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
1709                 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
1710                        " Bad things might happen.\n");
1711         }
1712         for (i = 0; i < rdev->usec_timeout; i++) {
1713                 tmp = RREG32(RADEON_RBBM_STATUS);
1714                 if (!(tmp & (1 << 31))) {
1715                         return 0;
1716                 }
1717                 DRM_UDELAY(1);
1718         }
1719         return -1;
1720 }
1721
1722 int r100_mc_wait_for_idle(struct radeon_device *rdev)
1723 {
1724         unsigned i;
1725         uint32_t tmp;
1726
1727         for (i = 0; i < rdev->usec_timeout; i++) {
1728                 /* read MC_STATUS */
1729                 tmp = RREG32(0x0150);
1730                 if (tmp & (1 << 2)) {
1731                         return 0;
1732                 }
1733                 DRM_UDELAY(1);
1734         }
1735         return -1;
1736 }
1737
1738 void r100_gpu_init(struct radeon_device *rdev)
1739 {
1740         /* TODO: anythings to do here ? pipes ? */
1741         r100_hdp_reset(rdev);
1742 }
1743
1744 void r100_hdp_reset(struct radeon_device *rdev)
1745 {
1746         uint32_t tmp;
1747
1748         tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
1749         tmp |= (7 << 28);
1750         WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
1751         (void)RREG32(RADEON_HOST_PATH_CNTL);
1752         udelay(200);
1753         WREG32(RADEON_RBBM_SOFT_RESET, 0);
1754         WREG32(RADEON_HOST_PATH_CNTL, tmp);
1755         (void)RREG32(RADEON_HOST_PATH_CNTL);
1756 }
1757
1758 int r100_rb2d_reset(struct radeon_device *rdev)
1759 {
1760         uint32_t tmp;
1761         int i;
1762
1763         WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
1764         (void)RREG32(RADEON_RBBM_SOFT_RESET);
1765         udelay(200);
1766         WREG32(RADEON_RBBM_SOFT_RESET, 0);
1767         /* Wait to prevent race in RBBM_STATUS */
1768         mdelay(1);
1769         for (i = 0; i < rdev->usec_timeout; i++) {
1770                 tmp = RREG32(RADEON_RBBM_STATUS);
1771                 if (!(tmp & (1 << 26))) {
1772                         DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
1773                                  tmp);
1774                         return 0;
1775                 }
1776                 DRM_UDELAY(1);
1777         }
1778         tmp = RREG32(RADEON_RBBM_STATUS);
1779         DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
1780         return -1;
1781 }
1782
1783 int r100_gpu_reset(struct radeon_device *rdev)
1784 {
1785         uint32_t status;
1786
1787         /* reset order likely matter */
1788         status = RREG32(RADEON_RBBM_STATUS);
1789         /* reset HDP */
1790         r100_hdp_reset(rdev);
1791         /* reset rb2d */
1792         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
1793                 r100_rb2d_reset(rdev);
1794         }
1795         /* TODO: reset 3D engine */
1796         /* reset CP */
1797         status = RREG32(RADEON_RBBM_STATUS);
1798         if (status & (1 << 16)) {
1799                 r100_cp_reset(rdev);
1800         }
1801         /* Check if GPU is idle */
1802         status = RREG32(RADEON_RBBM_STATUS);
1803         if (status & (1 << 31)) {
1804                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
1805                 return -1;
1806         }
1807         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
1808         return 0;
1809 }
1810
1811
1812 /*
1813  * VRAM info
1814  */
1815 static void r100_vram_get_type(struct radeon_device *rdev)
1816 {
1817         uint32_t tmp;
1818
1819         rdev->mc.vram_is_ddr = false;
1820         if (rdev->flags & RADEON_IS_IGP)
1821                 rdev->mc.vram_is_ddr = true;
1822         else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
1823                 rdev->mc.vram_is_ddr = true;
1824         if ((rdev->family == CHIP_RV100) ||
1825             (rdev->family == CHIP_RS100) ||
1826             (rdev->family == CHIP_RS200)) {
1827                 tmp = RREG32(RADEON_MEM_CNTL);
1828                 if (tmp & RV100_HALF_MODE) {
1829                         rdev->mc.vram_width = 32;
1830                 } else {
1831                         rdev->mc.vram_width = 64;
1832                 }
1833                 if (rdev->flags & RADEON_SINGLE_CRTC) {
1834                         rdev->mc.vram_width /= 4;
1835                         rdev->mc.vram_is_ddr = true;
1836                 }
1837         } else if (rdev->family <= CHIP_RV280) {
1838                 tmp = RREG32(RADEON_MEM_CNTL);
1839                 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
1840                         rdev->mc.vram_width = 128;
1841                 } else {
1842                         rdev->mc.vram_width = 64;
1843                 }
1844         } else {
1845                 /* newer IGPs */
1846                 rdev->mc.vram_width = 128;
1847         }
1848 }
1849
1850 static u32 r100_get_accessible_vram(struct radeon_device *rdev)
1851 {
1852         u32 aper_size;
1853         u8 byte;
1854
1855         aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
1856
1857         /* Set HDP_APER_CNTL only on cards that are known not to be broken,
1858          * that is has the 2nd generation multifunction PCI interface
1859          */
1860         if (rdev->family == CHIP_RV280 ||
1861             rdev->family >= CHIP_RV350) {
1862                 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
1863                        ~RADEON_HDP_APER_CNTL);
1864                 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
1865                 return aper_size * 2;
1866         }
1867
1868         /* Older cards have all sorts of funny issues to deal with. First
1869          * check if it's a multifunction card by reading the PCI config
1870          * header type... Limit those to one aperture size
1871          */
1872         pci_read_config_byte(rdev->pdev, 0xe, &byte);
1873         if (byte & 0x80) {
1874                 DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
1875                 DRM_INFO("Limiting VRAM to one aperture\n");
1876                 return aper_size;
1877         }
1878
1879         /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
1880          * have set it up. We don't write this as it's broken on some ASICs but
1881          * we expect the BIOS to have done the right thing (might be too optimistic...)
1882          */
1883         if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
1884                 return aper_size * 2;
1885         return aper_size;
1886 }
1887
1888 void r100_vram_init_sizes(struct radeon_device *rdev)
1889 {
1890         u64 config_aper_size;
1891         u32 accessible;
1892
1893         config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
1894
1895         if (rdev->flags & RADEON_IS_IGP) {
1896                 uint32_t tom;
1897                 /* read NB_TOM to get the amount of ram stolen for the GPU */
1898                 tom = RREG32(RADEON_NB_TOM);
1899                 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
1900                 /* for IGPs we need to keep VRAM where it was put by the BIOS */
1901                 rdev->mc.vram_location = (tom & 0xffff) << 16;
1902                 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
1903                 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
1904         } else {
1905                 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
1906                 /* Some production boards of m6 will report 0
1907                  * if it's 8 MB
1908                  */
1909                 if (rdev->mc.real_vram_size == 0) {
1910                         rdev->mc.real_vram_size = 8192 * 1024;
1911                         WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
1912                 }
1913                 /* let driver place VRAM */
1914                 rdev->mc.vram_location = 0xFFFFFFFFUL;
1915                  /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
1916                   * Novell bug 204882 + along with lots of ubuntu ones */
1917                 if (config_aper_size > rdev->mc.real_vram_size)
1918                         rdev->mc.mc_vram_size = config_aper_size;
1919                 else
1920                         rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
1921         }
1922
1923         /* work out accessible VRAM */
1924         accessible = r100_get_accessible_vram(rdev);
1925
1926         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
1927         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
1928
1929         if (accessible > rdev->mc.aper_size)
1930                 accessible = rdev->mc.aper_size;
1931
1932         if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
1933                 rdev->mc.mc_vram_size = rdev->mc.aper_size;
1934
1935         if (rdev->mc.real_vram_size > rdev->mc.aper_size)
1936                 rdev->mc.real_vram_size = rdev->mc.aper_size;
1937 }
1938
1939 void r100_vram_info(struct radeon_device *rdev)
1940 {
1941         r100_vram_get_type(rdev);
1942
1943         r100_vram_init_sizes(rdev);
1944 }
1945
1946
1947 /*
1948  * Indirect registers accessor
1949  */
1950 void r100_pll_errata_after_index(struct radeon_device *rdev)
1951 {
1952         if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
1953                 return;
1954         }
1955         (void)RREG32(RADEON_CLOCK_CNTL_DATA);
1956         (void)RREG32(RADEON_CRTC_GEN_CNTL);
1957 }
1958
1959 static void r100_pll_errata_after_data(struct radeon_device *rdev)
1960 {
1961         /* This workarounds is necessary on RV100, RS100 and RS200 chips
1962          * or the chip could hang on a subsequent access
1963          */
1964         if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
1965                 udelay(5000);
1966         }
1967
1968         /* This function is required to workaround a hardware bug in some (all?)
1969          * revisions of the R300.  This workaround should be called after every
1970          * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
1971          * may not be correct.
1972          */
1973         if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
1974                 uint32_t save, tmp;
1975
1976                 save = RREG32(RADEON_CLOCK_CNTL_INDEX);
1977                 tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
1978                 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
1979                 tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
1980                 WREG32(RADEON_CLOCK_CNTL_INDEX, save);
1981         }
1982 }
1983
1984 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
1985 {
1986         uint32_t data;
1987
1988         WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
1989         r100_pll_errata_after_index(rdev);
1990         data = RREG32(RADEON_CLOCK_CNTL_DATA);
1991         r100_pll_errata_after_data(rdev);
1992         return data;
1993 }
1994
1995 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1996 {
1997         WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
1998         r100_pll_errata_after_index(rdev);
1999         WREG32(RADEON_CLOCK_CNTL_DATA, v);
2000         r100_pll_errata_after_data(rdev);
2001 }
2002
2003 int r100_init(struct radeon_device *rdev)
2004 {
2005         if (ASIC_IS_RN50(rdev)) {
2006                 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2007                 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2008         } else if (rdev->family < CHIP_R200) {
2009                 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2010                 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2011         } else {
2012                 return r200_init(rdev);
2013         }
2014         return 0;
2015 }
2016
2017 /*
2018  * Debugfs info
2019  */
2020 #if defined(CONFIG_DEBUG_FS)
2021 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2022 {
2023         struct drm_info_node *node = (struct drm_info_node *) m->private;
2024         struct drm_device *dev = node->minor->dev;
2025         struct radeon_device *rdev = dev->dev_private;
2026         uint32_t reg, value;
2027         unsigned i;
2028
2029         seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2030         seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2031         seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2032         for (i = 0; i < 64; i++) {
2033                 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2034                 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2035                 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2036                 value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2037                 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2038         }
2039         return 0;
2040 }
2041
2042 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2043 {
2044         struct drm_info_node *node = (struct drm_info_node *) m->private;
2045         struct drm_device *dev = node->minor->dev;
2046         struct radeon_device *rdev = dev->dev_private;
2047         uint32_t rdp, wdp;
2048         unsigned count, i, j;
2049
2050         radeon_ring_free_size(rdev);
2051         rdp = RREG32(RADEON_CP_RB_RPTR);
2052         wdp = RREG32(RADEON_CP_RB_WPTR);
2053         count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
2054         seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2055         seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2056         seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2057         seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
2058         seq_printf(m, "%u dwords in ring\n", count);
2059         for (j = 0; j <= count; j++) {
2060                 i = (rdp + j) & rdev->cp.ptr_mask;
2061                 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
2062         }
2063         return 0;
2064 }
2065
2066
2067 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2068 {
2069         struct drm_info_node *node = (struct drm_info_node *) m->private;
2070         struct drm_device *dev = node->minor->dev;
2071         struct radeon_device *rdev = dev->dev_private;
2072         uint32_t csq_stat, csq2_stat, tmp;
2073         unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2074         unsigned i;
2075
2076         seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2077         seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2078         csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2079         csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2080         r_rptr = (csq_stat >> 0) & 0x3ff;
2081         r_wptr = (csq_stat >> 10) & 0x3ff;
2082         ib1_rptr = (csq_stat >> 20) & 0x3ff;
2083         ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2084         ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2085         ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2086         seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2087         seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2088         seq_printf(m, "Ring rptr %u\n", r_rptr);
2089         seq_printf(m, "Ring wptr %u\n", r_wptr);
2090         seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2091         seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2092         seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2093         seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2094         /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
2095          * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
2096         seq_printf(m, "Ring fifo:\n");
2097         for (i = 0; i < 256; i++) {
2098                 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2099                 tmp = RREG32(RADEON_CP_CSQ_DATA);
2100                 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
2101         }
2102         seq_printf(m, "Indirect1 fifo:\n");
2103         for (i = 256; i <= 512; i++) {
2104                 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2105                 tmp = RREG32(RADEON_CP_CSQ_DATA);
2106                 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
2107         }
2108         seq_printf(m, "Indirect2 fifo:\n");
2109         for (i = 640; i < ib1_wptr; i++) {
2110                 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2111                 tmp = RREG32(RADEON_CP_CSQ_DATA);
2112                 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
2113         }
2114         return 0;
2115 }
2116
2117 static int r100_debugfs_mc_info(struct seq_file *m, void *data)
2118 {
2119         struct drm_info_node *node = (struct drm_info_node *) m->private;
2120         struct drm_device *dev = node->minor->dev;
2121         struct radeon_device *rdev = dev->dev_private;
2122         uint32_t tmp;
2123
2124         tmp = RREG32(RADEON_CONFIG_MEMSIZE);
2125         seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
2126         tmp = RREG32(RADEON_MC_FB_LOCATION);
2127         seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
2128         tmp = RREG32(RADEON_BUS_CNTL);
2129         seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
2130         tmp = RREG32(RADEON_MC_AGP_LOCATION);
2131         seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
2132         tmp = RREG32(RADEON_AGP_BASE);
2133         seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
2134         tmp = RREG32(RADEON_HOST_PATH_CNTL);
2135         seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
2136         tmp = RREG32(0x01D0);
2137         seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
2138         tmp = RREG32(RADEON_AIC_LO_ADDR);
2139         seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
2140         tmp = RREG32(RADEON_AIC_HI_ADDR);
2141         seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
2142         tmp = RREG32(0x01E4);
2143         seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
2144         return 0;
2145 }
2146
2147 static struct drm_info_list r100_debugfs_rbbm_list[] = {
2148         {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
2149 };
2150
2151 static struct drm_info_list r100_debugfs_cp_list[] = {
2152         {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
2153         {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
2154 };
2155
2156 static struct drm_info_list r100_debugfs_mc_info_list[] = {
2157         {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
2158 };
2159 #endif
2160
2161 int r100_debugfs_rbbm_init(struct radeon_device *rdev)
2162 {
2163 #if defined(CONFIG_DEBUG_FS)
2164         return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
2165 #else
2166         return 0;
2167 #endif
2168 }
2169
2170 int r100_debugfs_cp_init(struct radeon_device *rdev)
2171 {
2172 #if defined(CONFIG_DEBUG_FS)
2173         return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
2174 #else
2175         return 0;
2176 #endif
2177 }
2178
2179 int r100_debugfs_mc_info_init(struct radeon_device *rdev)
2180 {
2181 #if defined(CONFIG_DEBUG_FS)
2182         return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
2183 #else
2184         return 0;
2185 #endif
2186 }
2187
2188 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
2189                          uint32_t tiling_flags, uint32_t pitch,
2190                          uint32_t offset, uint32_t obj_size)
2191 {
2192         int surf_index = reg * 16;
2193         int flags = 0;
2194
2195         /* r100/r200 divide by 16 */
2196         if (rdev->family < CHIP_R300)
2197                 flags = pitch / 16;
2198         else
2199                 flags = pitch / 8;
2200
2201         if (rdev->family <= CHIP_RS200) {
2202                 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2203                                  == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2204                         flags |= RADEON_SURF_TILE_COLOR_BOTH;
2205                 if (tiling_flags & RADEON_TILING_MACRO)
2206                         flags |= RADEON_SURF_TILE_COLOR_MACRO;
2207         } else if (rdev->family <= CHIP_RV280) {
2208                 if (tiling_flags & (RADEON_TILING_MACRO))
2209                         flags |= R200_SURF_TILE_COLOR_MACRO;
2210                 if (tiling_flags & RADEON_TILING_MICRO)
2211                         flags |= R200_SURF_TILE_COLOR_MICRO;
2212         } else {
2213                 if (tiling_flags & RADEON_TILING_MACRO)
2214                         flags |= R300_SURF_TILE_MACRO;
2215                 if (tiling_flags & RADEON_TILING_MICRO)
2216                         flags |= R300_SURF_TILE_MICRO;
2217         }
2218
2219         DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
2220         WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
2221         WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
2222         WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
2223         return 0;
2224 }
2225
2226 void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
2227 {
2228         int surf_index = reg * 16;
2229         WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
2230 }
2231
2232 void r100_bandwidth_update(struct radeon_device *rdev)
2233 {
2234         fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
2235         fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
2236         fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
2237         uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
2238         fixed20_12 memtcas_ff[8] = {
2239                 fixed_init(1),
2240                 fixed_init(2),
2241                 fixed_init(3),
2242                 fixed_init(0),
2243                 fixed_init_half(1),
2244                 fixed_init_half(2),
2245                 fixed_init(0),
2246         };
2247         fixed20_12 memtcas_rs480_ff[8] = {
2248                 fixed_init(0),
2249                 fixed_init(1),
2250                 fixed_init(2),
2251                 fixed_init(3),
2252                 fixed_init(0),
2253                 fixed_init_half(1),
2254                 fixed_init_half(2),
2255                 fixed_init_half(3),
2256         };
2257         fixed20_12 memtcas2_ff[8] = {
2258                 fixed_init(0),
2259                 fixed_init(1),
2260                 fixed_init(2),
2261                 fixed_init(3),
2262                 fixed_init(4),
2263                 fixed_init(5),
2264                 fixed_init(6),
2265                 fixed_init(7),
2266         };
2267         fixed20_12 memtrbs[8] = {
2268                 fixed_init(1),
2269                 fixed_init_half(1),
2270                 fixed_init(2),
2271                 fixed_init_half(2),
2272                 fixed_init(3),
2273                 fixed_init_half(3),
2274                 fixed_init(4),
2275                 fixed_init_half(4)
2276         };
2277         fixed20_12 memtrbs_r4xx[8] = {
2278                 fixed_init(4),
2279                 fixed_init(5),
2280                 fixed_init(6),
2281                 fixed_init(7),
2282                 fixed_init(8),
2283                 fixed_init(9),
2284                 fixed_init(10),
2285                 fixed_init(11)
2286         };
2287         fixed20_12 min_mem_eff;
2288         fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
2289         fixed20_12 cur_latency_mclk, cur_latency_sclk;
2290         fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
2291                 disp_drain_rate2, read_return_rate;
2292         fixed20_12 time_disp1_drop_priority;
2293         int c;
2294         int cur_size = 16;       /* in octawords */
2295         int critical_point = 0, critical_point2;
2296 /*      uint32_t read_return_rate, time_disp1_drop_priority; */
2297         int stop_req, max_stop_req;
2298         struct drm_display_mode *mode1 = NULL;
2299         struct drm_display_mode *mode2 = NULL;
2300         uint32_t pixel_bytes1 = 0;
2301         uint32_t pixel_bytes2 = 0;
2302
2303         if (rdev->mode_info.crtcs[0]->base.enabled) {
2304                 mode1 = &rdev->mode_info.crtcs[0]->base.mode;
2305                 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
2306         }
2307         if (rdev->mode_info.crtcs[1]->base.enabled) {
2308                 mode2 = &rdev->mode_info.crtcs[1]->base.mode;
2309                 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
2310         }
2311
2312         min_mem_eff.full = rfixed_const_8(0);
2313         /* get modes */
2314         if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
2315                 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
2316                 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
2317                 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
2318                 /* check crtc enables */
2319                 if (mode2)
2320                         mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
2321                 if (mode1)
2322                         mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
2323                 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
2324         }
2325
2326         /*
2327          * determine is there is enough bw for current mode
2328          */
2329         mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
2330         temp_ff.full = rfixed_const(100);
2331         mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
2332         sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
2333         sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
2334
2335         temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
2336         temp_ff.full = rfixed_const(temp);
2337         mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
2338
2339         pix_clk.full = 0;
2340         pix_clk2.full = 0;
2341         peak_disp_bw.full = 0;
2342         if (mode1) {
2343                 temp_ff.full = rfixed_const(1000);
2344                 pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
2345                 pix_clk.full = rfixed_div(pix_clk, temp_ff);
2346                 temp_ff.full = rfixed_const(pixel_bytes1);
2347                 peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
2348         }
2349         if (mode2) {
2350                 temp_ff.full = rfixed_const(1000);
2351                 pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
2352                 pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
2353                 temp_ff.full = rfixed_const(pixel_bytes2);
2354                 peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
2355         }
2356
2357         mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
2358         if (peak_disp_bw.full >= mem_bw.full) {
2359                 DRM_ERROR("You may not have enough display bandwidth for current mode\n"
2360                           "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
2361         }
2362
2363         /*  Get values from the EXT_MEM_CNTL register...converting its contents. */
2364         temp = RREG32(RADEON_MEM_TIMING_CNTL);
2365         if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
2366                 mem_trcd = ((temp >> 2) & 0x3) + 1;
2367                 mem_trp  = ((temp & 0x3)) + 1;
2368                 mem_tras = ((temp & 0x70) >> 4) + 1;
2369         } else if (rdev->family == CHIP_R300 ||
2370                    rdev->family == CHIP_R350) { /* r300, r350 */
2371                 mem_trcd = (temp & 0x7) + 1;
2372                 mem_trp = ((temp >> 8) & 0x7) + 1;
2373                 mem_tras = ((temp >> 11) & 0xf) + 4;
2374         } else if (rdev->family == CHIP_RV350 ||
2375                    rdev->family <= CHIP_RV380) {
2376                 /* rv3x0 */
2377                 mem_trcd = (temp & 0x7) + 3;
2378                 mem_trp = ((temp >> 8) & 0x7) + 3;
2379                 mem_tras = ((temp >> 11) & 0xf) + 6;
2380         } else if (rdev->family == CHIP_R420 ||
2381                    rdev->family == CHIP_R423 ||
2382                    rdev->family == CHIP_RV410) {
2383                 /* r4xx */
2384                 mem_trcd = (temp & 0xf) + 3;
2385                 if (mem_trcd > 15)
2386                         mem_trcd = 15;
2387                 mem_trp = ((temp >> 8) & 0xf) + 3;
2388                 if (mem_trp > 15)
2389                         mem_trp = 15;
2390                 mem_tras = ((temp >> 12) & 0x1f) + 6;
2391                 if (mem_tras > 31)
2392                         mem_tras = 31;
2393         } else { /* RV200, R200 */
2394                 mem_trcd = (temp & 0x7) + 1;
2395                 mem_trp = ((temp >> 8) & 0x7) + 1;
2396                 mem_tras = ((temp >> 12) & 0xf) + 4;
2397         }
2398         /* convert to FF */
2399         trcd_ff.full = rfixed_const(mem_trcd);
2400         trp_ff.full = rfixed_const(mem_trp);
2401         tras_ff.full = rfixed_const(mem_tras);
2402
2403         /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
2404         temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
2405         data = (temp & (7 << 20)) >> 20;
2406         if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
2407                 if (rdev->family == CHIP_RS480) /* don't think rs400 */
2408                         tcas_ff = memtcas_rs480_ff[data];
2409                 else
2410                         tcas_ff = memtcas_ff[data];
2411         } else
2412                 tcas_ff = memtcas2_ff[data];
2413
2414         if (rdev->family == CHIP_RS400 ||
2415             rdev->family == CHIP_RS480) {
2416                 /* extra cas latency stored in bits 23-25 0-4 clocks */
2417                 data = (temp >> 23) & 0x7;
2418                 if (data < 5)
2419                         tcas_ff.full += rfixed_const(data);
2420         }
2421
2422         if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
2423                 /* on the R300, Tcas is included in Trbs.
2424                  */
2425                 temp = RREG32(RADEON_MEM_CNTL);
2426                 data = (R300_MEM_NUM_CHANNELS_MASK & temp);
2427                 if (data == 1) {
2428                         if (R300_MEM_USE_CD_CH_ONLY & temp) {
2429                                 temp = RREG32(R300_MC_IND_INDEX);
2430                                 temp &= ~R300_MC_IND_ADDR_MASK;
2431                                 temp |= R300_MC_READ_CNTL_CD_mcind;
2432                                 WREG32(R300_MC_IND_INDEX, temp);
2433                                 temp = RREG32(R300_MC_IND_DATA);
2434                                 data = (R300_MEM_RBS_POSITION_C_MASK & temp);
2435                         } else {
2436                                 temp = RREG32(R300_MC_READ_CNTL_AB);
2437                                 data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2438                         }
2439                 } else {
2440                         temp = RREG32(R300_MC_READ_CNTL_AB);
2441                         data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2442                 }
2443                 if (rdev->family == CHIP_RV410 ||
2444                     rdev->family == CHIP_R420 ||
2445                     rdev->family == CHIP_R423)
2446                         trbs_ff = memtrbs_r4xx[data];
2447                 else
2448                         trbs_ff = memtrbs[data];
2449                 tcas_ff.full += trbs_ff.full;
2450         }
2451
2452         sclk_eff_ff.full = sclk_ff.full;
2453
2454         if (rdev->flags & RADEON_IS_AGP) {
2455                 fixed20_12 agpmode_ff;
2456                 agpmode_ff.full = rfixed_const(radeon_agpmode);
2457                 temp_ff.full = rfixed_const_666(16);
2458                 sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
2459         }
2460         /* TODO PCIE lanes may affect this - agpmode == 16?? */
2461
2462         if (ASIC_IS_R300(rdev)) {
2463                 sclk_delay_ff.full = rfixed_const(250);
2464         } else {
2465                 if ((rdev->family == CHIP_RV100) ||
2466                     rdev->flags & RADEON_IS_IGP) {
2467                         if (rdev->mc.vram_is_ddr)
2468                                 sclk_delay_ff.full = rfixed_const(41);
2469                         else
2470                                 sclk_delay_ff.full = rfixed_const(33);
2471                 } else {
2472                         if (rdev->mc.vram_width == 128)
2473                                 sclk_delay_ff.full = rfixed_const(57);
2474                         else
2475                                 sclk_delay_ff.full = rfixed_const(41);
2476                 }
2477         }
2478
2479         mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
2480
2481         if (rdev->mc.vram_is_ddr) {
2482                 if (rdev->mc.vram_width == 32) {
2483                         k1.full = rfixed_const(40);
2484                         c  = 3;
2485                 } else {
2486                         k1.full = rfixed_const(20);
2487                         c  = 1;
2488                 }
2489         } else {
2490                 k1.full = rfixed_const(40);
2491                 c  = 3;
2492         }
2493
2494         temp_ff.full = rfixed_const(2);
2495         mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
2496         temp_ff.full = rfixed_const(c);
2497         mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
2498         temp_ff.full = rfixed_const(4);
2499         mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
2500         mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
2501         mc_latency_mclk.full += k1.full;
2502
2503         mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
2504         mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
2505
2506         /*
2507           HW cursor time assuming worst case of full size colour cursor.
2508         */
2509         temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
2510         temp_ff.full += trcd_ff.full;
2511         if (temp_ff.full < tras_ff.full)
2512                 temp_ff.full = tras_ff.full;
2513         cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
2514
2515         temp_ff.full = rfixed_const(cur_size);
2516         cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
2517         /*
2518           Find the total latency for the display data.
2519         */
2520         disp_latency_overhead.full = rfixed_const(80);
2521         disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
2522         mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
2523         mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
2524
2525         if (mc_latency_mclk.full > mc_latency_sclk.full)
2526                 disp_latency.full = mc_latency_mclk.full;
2527         else
2528                 disp_latency.full = mc_latency_sclk.full;
2529
2530         /* setup Max GRPH_STOP_REQ default value */
2531         if (ASIC_IS_RV100(rdev))
2532                 max_stop_req = 0x5c;
2533         else
2534                 max_stop_req = 0x7c;
2535
2536         if (mode1) {
2537                 /*  CRTC1
2538                     Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
2539                     GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
2540                 */
2541                 stop_req = mode1->hdisplay * pixel_bytes1 / 16;
2542
2543                 if (stop_req > max_stop_req)
2544                         stop_req = max_stop_req;
2545
2546                 /*
2547                   Find the drain rate of the display buffer.
2548                 */
2549                 temp_ff.full = rfixed_const((16/pixel_bytes1));
2550                 disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
2551
2552                 /*
2553                   Find the critical point of the display buffer.
2554                 */
2555                 crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
2556                 crit_point_ff.full += rfixed_const_half(0);
2557
2558                 critical_point = rfixed_trunc(crit_point_ff);
2559
2560                 if (rdev->disp_priority == 2) {
2561                         critical_point = 0;
2562                 }
2563
2564                 /*
2565                   The critical point should never be above max_stop_req-4.  Setting
2566                   GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
2567                 */
2568                 if (max_stop_req - critical_point < 4)
2569                         critical_point = 0;
2570
2571                 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
2572                         /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
2573                         critical_point = 0x10;
2574                 }
2575
2576                 temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
2577                 temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
2578                 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
2579                 temp &= ~(RADEON_GRPH_START_REQ_MASK);
2580                 if ((rdev->family == CHIP_R350) &&
2581                     (stop_req > 0x15)) {
2582                         stop_req -= 0x10;
2583                 }
2584                 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
2585                 temp |= RADEON_GRPH_BUFFER_SIZE;
2586                 temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
2587                           RADEON_GRPH_CRITICAL_AT_SOF |
2588                           RADEON_GRPH_STOP_CNTL);
2589                 /*
2590                   Write the result into the register.
2591                 */
2592                 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
2593                                                        (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
2594
2595 #if 0
2596                 if ((rdev->family == CHIP_RS400) ||
2597                     (rdev->family == CHIP_RS480)) {
2598                         /* attempt to program RS400 disp regs correctly ??? */
2599                         temp = RREG32(RS400_DISP1_REG_CNTL);
2600                         temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
2601                                   RS400_DISP1_STOP_REQ_LEVEL_MASK);
2602                         WREG32(RS400_DISP1_REQ_CNTL1, (temp |
2603                                                        (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
2604                                                        (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
2605                         temp = RREG32(RS400_DMIF_MEM_CNTL1);
2606                         temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
2607                                   RS400_DISP1_CRITICAL_POINT_STOP_MASK);
2608                         WREG32(RS400_DMIF_MEM_CNTL1, (temp |
2609                                                       (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
2610                                                       (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
2611                 }
2612 #endif
2613
2614                 DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
2615                           /*      (unsigned int)info->SavedReg->grph_buffer_cntl, */
2616                           (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
2617         }
2618
2619         if (mode2) {
2620                 u32 grph2_cntl;
2621                 stop_req = mode2->hdisplay * pixel_bytes2 / 16;
2622
2623                 if (stop_req > max_stop_req)
2624                         stop_req = max_stop_req;
2625
2626                 /*
2627                   Find the drain rate of the display buffer.
2628                 */
2629                 temp_ff.full = rfixed_const((16/pixel_bytes2));
2630                 disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
2631
2632                 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
2633                 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
2634                 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
2635                 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
2636                 if ((rdev->family == CHIP_R350) &&
2637                     (stop_req > 0x15)) {
2638                         stop_req -= 0x10;
2639                 }
2640                 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
2641                 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
2642                 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
2643                           RADEON_GRPH_CRITICAL_AT_SOF |
2644                           RADEON_GRPH_STOP_CNTL);
2645
2646                 if ((rdev->family == CHIP_RS100) ||
2647                     (rdev->family == CHIP_RS200))
2648                         critical_point2 = 0;
2649                 else {
2650                         temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
2651                         temp_ff.full = rfixed_const(temp);
2652                         temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
2653                         if (sclk_ff.full < temp_ff.full)
2654                                 temp_ff.full = sclk_ff.full;
2655
2656                         read_return_rate.full = temp_ff.full;
2657
2658                         if (mode1) {
2659                                 temp_ff.full = read_return_rate.full - disp_drain_rate.full;
2660                                 time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
2661                         } else {
2662                                 time_disp1_drop_priority.full = 0;
2663                         }
2664                         crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
2665                         crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
2666                         crit_point_ff.full += rfixed_const_half(0);
2667
2668                         critical_point2 = rfixed_trunc(crit_point_ff);
2669
2670                         if (rdev->disp_priority == 2) {
2671                                 critical_point2 = 0;
2672                         }
2673
2674                         if (max_stop_req - critical_point2 < 4)
2675                                 critical_point2 = 0;
2676
2677                 }
2678
2679                 if (critical_point2 == 0 && rdev->family == CHIP_R300) {
2680                         /* some R300 cards have problem with this set to 0 */
2681                         critical_point2 = 0x10;
2682                 }
2683
2684                 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
2685                                                   (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
2686
2687                 if ((rdev->family == CHIP_RS400) ||
2688                     (rdev->family == CHIP_RS480)) {
2689 #if 0
2690                         /* attempt to program RS400 disp2 regs correctly ??? */
2691                         temp = RREG32(RS400_DISP2_REQ_CNTL1);
2692                         temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
2693                                   RS400_DISP2_STOP_REQ_LEVEL_MASK);
2694                         WREG32(RS400_DISP2_REQ_CNTL1, (temp |
2695                                                        (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
2696                                                        (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
2697                         temp = RREG32(RS400_DISP2_REQ_CNTL2);
2698                         temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
2699                                   RS400_DISP2_CRITICAL_POINT_STOP_MASK);
2700                         WREG32(RS400_DISP2_REQ_CNTL2, (temp |
2701                                                        (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
2702                                                        (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
2703 #endif
2704                         WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
2705                         WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
2706                         WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
2707                         WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
2708                 }
2709
2710                 DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
2711                           (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
2712         }
2713 }
2714
2715 static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2716 {
2717         DRM_ERROR("pitch                      %d\n", t->pitch);
2718         DRM_ERROR("width                      %d\n", t->width);
2719         DRM_ERROR("height                     %d\n", t->height);
2720         DRM_ERROR("num levels                 %d\n", t->num_levels);
2721         DRM_ERROR("depth                      %d\n", t->txdepth);
2722         DRM_ERROR("bpp                        %d\n", t->cpp);
2723         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2724         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2725         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2726 }
2727
2728 static int r100_cs_track_cube(struct radeon_device *rdev,
2729                               struct r100_cs_track *track, unsigned idx)
2730 {
2731         unsigned face, w, h;
2732         struct radeon_object *cube_robj;
2733         unsigned long size;
2734
2735         for (face = 0; face < 5; face++) {
2736                 cube_robj = track->textures[idx].cube_info[face].robj;
2737                 w = track->textures[idx].cube_info[face].width;
2738                 h = track->textures[idx].cube_info[face].height;
2739
2740                 size = w * h;
2741                 size *= track->textures[idx].cpp;
2742
2743                 size += track->textures[idx].cube_info[face].offset;
2744
2745                 if (size > radeon_object_size(cube_robj)) {
2746                         DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2747                                   size, radeon_object_size(cube_robj));
2748                         r100_cs_track_texture_print(&track->textures[idx]);
2749                         return -1;
2750                 }
2751         }
2752         return 0;
2753 }
2754
2755 static int r100_cs_track_texture_check(struct radeon_device *rdev,
2756                                        struct r100_cs_track *track)
2757 {
2758         struct radeon_object *robj;
2759         unsigned long size;
2760         unsigned u, i, w, h;
2761         int ret;
2762
2763         for (u = 0; u < track->num_texture; u++) {
2764                 if (!track->textures[u].enabled)
2765                         continue;
2766                 robj = track->textures[u].robj;
2767                 if (robj == NULL) {
2768                         DRM_ERROR("No texture bound to unit %u\n", u);
2769                         return -EINVAL;
2770                 }
2771                 size = 0;
2772                 for (i = 0; i <= track->textures[u].num_levels; i++) {
2773                         if (track->textures[u].use_pitch) {
2774                                 if (rdev->family < CHIP_R300)
2775                                         w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2776                                 else
2777                                         w = track->textures[u].pitch / (1 << i);
2778                         } else {
2779                                 w = track->textures[u].width / (1 << i);
2780                                 if (rdev->family >= CHIP_RV515)
2781                                         w |= track->textures[u].width_11;
2782                                 if (track->textures[u].roundup_w)
2783                                         w = roundup_pow_of_two(w);
2784                         }
2785                         h = track->textures[u].height / (1 << i);
2786                         if (rdev->family >= CHIP_RV515)
2787                                 h |= track->textures[u].height_11;
2788                         if (track->textures[u].roundup_h)
2789                                 h = roundup_pow_of_two(h);
2790                         size += w * h;
2791                 }
2792                 size *= track->textures[u].cpp;
2793                 switch (track->textures[u].tex_coord_type) {
2794                 case 0:
2795                         break;
2796                 case 1:
2797                         size *= (1 << track->textures[u].txdepth);
2798                         break;
2799                 case 2:
2800                         if (track->separate_cube) {
2801                                 ret = r100_cs_track_cube(rdev, track, u);
2802                                 if (ret)
2803                                         return ret;
2804                         } else
2805                                 size *= 6;
2806                         break;
2807                 default:
2808                         DRM_ERROR("Invalid texture coordinate type %u for unit "
2809                                   "%u\n", track->textures[u].tex_coord_type, u);
2810                         return -EINVAL;
2811                 }
2812                 if (size > radeon_object_size(robj)) {
2813                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2814                                   "%lu\n", u, size, radeon_object_size(robj));
2815                         r100_cs_track_texture_print(&track->textures[u]);
2816                         return -EINVAL;
2817                 }
2818         }
2819         return 0;
2820 }
2821
2822 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2823 {
2824         unsigned i;
2825         unsigned long size;
2826         unsigned prim_walk;
2827         unsigned nverts;
2828
2829         for (i = 0; i < track->num_cb; i++) {
2830                 if (track->cb[i].robj == NULL) {
2831                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2832                         return -EINVAL;
2833                 }
2834                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2835                 size += track->cb[i].offset;
2836                 if (size > radeon_object_size(track->cb[i].robj)) {
2837                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
2838                                   "(need %lu have %lu) !\n", i, size,
2839                                   radeon_object_size(track->cb[i].robj));
2840                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2841                                   i, track->cb[i].pitch, track->cb[i].cpp,
2842                                   track->cb[i].offset, track->maxy);
2843                         return -EINVAL;
2844                 }
2845         }
2846         if (track->z_enabled) {
2847                 if (track->zb.robj == NULL) {
2848                         DRM_ERROR("[drm] No buffer for z buffer !\n");
2849                         return -EINVAL;
2850                 }
2851                 size = track->zb.pitch * track->zb.cpp * track->maxy;
2852                 size += track->zb.offset;
2853                 if (size > radeon_object_size(track->zb.robj)) {
2854                         DRM_ERROR("[drm] Buffer too small for z buffer "
2855                                   "(need %lu have %lu) !\n", size,
2856                                   radeon_object_size(track->zb.robj));
2857                         DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2858                                   track->zb.pitch, track->zb.cpp,
2859                                   track->zb.offset, track->maxy);
2860                         return -EINVAL;
2861                 }
2862         }
2863         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2864         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2865         switch (prim_walk) {
2866         case 1:
2867                 for (i = 0; i < track->num_arrays; i++) {
2868                         size = track->arrays[i].esize * track->max_indx * 4;
2869                         if (track->arrays[i].robj == NULL) {
2870                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2871                                           "bound\n", prim_walk, i);
2872                                 return -EINVAL;
2873                         }
2874                         if (size > radeon_object_size(track->arrays[i].robj)) {
2875                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
2876                                            "have %lu dwords\n", prim_walk, i,
2877                                            size >> 2,
2878                                            radeon_object_size(track->arrays[i].robj) >> 2);
2879                                 DRM_ERROR("Max indices %u\n", track->max_indx);
2880                                 return -EINVAL;
2881                         }
2882                 }
2883                 break;
2884         case 2:
2885                 for (i = 0; i < track->num_arrays; i++) {
2886                         size = track->arrays[i].esize * (nverts - 1) * 4;
2887                         if (track->arrays[i].robj == NULL) {
2888                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2889                                           "bound\n", prim_walk, i);
2890                                 return -EINVAL;
2891                         }
2892                         if (size > radeon_object_size(track->arrays[i].robj)) {
2893                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
2894                                            "have %lu dwords\n", prim_walk, i, size >> 2,
2895                                            radeon_object_size(track->arrays[i].robj) >> 2);
2896                                 return -EINVAL;
2897                         }
2898                 }
2899                 break;
2900         case 3:
2901                 size = track->vtx_size * nverts;
2902                 if (size != track->immd_dwords) {
2903                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2904                                   track->immd_dwords, size);
2905                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2906                                   nverts, track->vtx_size);
2907                         return -EINVAL;
2908                 }
2909                 break;
2910         default:
2911                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2912                           prim_walk);
2913                 return -EINVAL;
2914         }
2915         return r100_cs_track_texture_check(rdev, track);
2916 }
2917
2918 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2919 {
2920         unsigned i, face;
2921
2922         if (rdev->family < CHIP_R300) {
2923                 track->num_cb = 1;
2924                 if (rdev->family <= CHIP_RS200)
2925                         track->num_texture = 3;
2926                 else
2927                         track->num_texture = 6;
2928                 track->maxy = 2048;
2929                 track->separate_cube = 1;
2930         } else {
2931                 track->num_cb = 4;
2932                 track->num_texture = 16;
2933                 track->maxy = 4096;
2934                 track->separate_cube = 0;
2935         }
2936
2937         for (i = 0; i < track->num_cb; i++) {
2938                 track->cb[i].robj = NULL;
2939                 track->cb[i].pitch = 8192;
2940                 track->cb[i].cpp = 16;
2941                 track->cb[i].offset = 0;
2942         }
2943         track->z_enabled = true;
2944         track->zb.robj = NULL;
2945         track->zb.pitch = 8192;
2946         track->zb.cpp = 4;
2947         track->zb.offset = 0;
2948         track->vtx_size = 0x7F;
2949         track->immd_dwords = 0xFFFFFFFFUL;
2950         track->num_arrays = 11;
2951         track->max_indx = 0x00FFFFFFUL;
2952         for (i = 0; i < track->num_arrays; i++) {
2953                 track->arrays[i].robj = NULL;
2954                 track->arrays[i].esize = 0x7F;
2955         }
2956         for (i = 0; i < track->num_texture; i++) {
2957                 track->textures[i].pitch = 16536;
2958                 track->textures[i].width = 16536;
2959                 track->textures[i].height = 16536;
2960                 track->textures[i].width_11 = 1 << 11;
2961                 track->textures[i].height_11 = 1 << 11;
2962                 track->textures[i].num_levels = 12;
2963                 if (rdev->family <= CHIP_RS200) {
2964                         track->textures[i].tex_coord_type = 0;
2965                         track->textures[i].txdepth = 0;
2966                 } else {
2967                         track->textures[i].txdepth = 16;
2968                         track->textures[i].tex_coord_type = 1;
2969                 }
2970                 track->textures[i].cpp = 64;
2971                 track->textures[i].robj = NULL;
2972                 /* CS IB emission code makes sure texture unit are disabled */
2973                 track->textures[i].enabled = false;
2974                 track->textures[i].roundup_w = true;
2975                 track->textures[i].roundup_h = true;
2976                 if (track->separate_cube)
2977                         for (face = 0; face < 5; face++) {
2978                                 track->textures[i].cube_info[face].robj = NULL;
2979                                 track->textures[i].cube_info[face].width = 16536;
2980                                 track->textures[i].cube_info[face].height = 16536;
2981                                 track->textures[i].cube_info[face].offset = 0;
2982                         }
2983         }
2984 }
2985
2986 int r100_ring_test(struct radeon_device *rdev)
2987 {
2988         uint32_t scratch;
2989         uint32_t tmp = 0;
2990         unsigned i;
2991         int r;
2992
2993         r = radeon_scratch_get(rdev, &scratch);
2994         if (r) {
2995                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2996                 return r;
2997         }
2998         WREG32(scratch, 0xCAFEDEAD);
2999         r = radeon_ring_lock(rdev, 2);
3000         if (r) {
3001                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3002                 radeon_scratch_free(rdev, scratch);
3003                 return r;
3004         }
3005         radeon_ring_write(rdev, PACKET0(scratch, 0));
3006         radeon_ring_write(rdev, 0xDEADBEEF);
3007         radeon_ring_unlock_commit(rdev);
3008         for (i = 0; i < rdev->usec_timeout; i++) {
3009                 tmp = RREG32(scratch);
3010                 if (tmp == 0xDEADBEEF) {
3011                         break;
3012                 }
3013                 DRM_UDELAY(1);
3014         }
3015         if (i < rdev->usec_timeout) {
3016                 DRM_INFO("ring test succeeded in %d usecs\n", i);
3017         } else {
3018                 DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
3019                           scratch, tmp);
3020                 r = -EINVAL;
3021         }
3022         radeon_scratch_free(rdev, scratch);
3023         return r;
3024 }
3025
3026 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3027 {
3028         radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
3029         radeon_ring_write(rdev, ib->gpu_addr);
3030         radeon_ring_write(rdev, ib->length_dw);
3031 }
3032
3033 int r100_ib_test(struct radeon_device *rdev)
3034 {
3035         struct radeon_ib *ib;
3036         uint32_t scratch;
3037         uint32_t tmp = 0;
3038         unsigned i;
3039         int r;
3040
3041         r = radeon_scratch_get(rdev, &scratch);
3042         if (r) {
3043                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3044                 return r;
3045         }
3046         WREG32(scratch, 0xCAFEDEAD);
3047         r = radeon_ib_get(rdev, &ib);
3048         if (r) {
3049                 return r;
3050         }
3051         ib->ptr[0] = PACKET0(scratch, 0);
3052         ib->ptr[1] = 0xDEADBEEF;
3053         ib->ptr[2] = PACKET2(0);
3054         ib->ptr[3] = PACKET2(0);
3055         ib->ptr[4] = PACKET2(0);
3056         ib->ptr[5] = PACKET2(0);
3057         ib->ptr[6] = PACKET2(0);
3058         ib->ptr[7] = PACKET2(0);
3059         ib->length_dw = 8;
3060         r = radeon_ib_schedule(rdev, ib);
3061         if (r) {
3062                 radeon_scratch_free(rdev, scratch);
3063                 radeon_ib_free(rdev, &ib);
3064                 return r;
3065         }
3066         r = radeon_fence_wait(ib->fence, false);
3067         if (r) {
3068                 return r;
3069         }
3070         for (i = 0; i < rdev->usec_timeout; i++) {
3071                 tmp = RREG32(scratch);
3072                 if (tmp == 0xDEADBEEF) {
3073                         break;
3074                 }
3075                 DRM_UDELAY(1);
3076         }
3077         if (i < rdev->usec_timeout) {
3078                 DRM_INFO("ib test succeeded in %u usecs\n", i);
3079         } else {
3080                 DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
3081                           scratch, tmp);
3082                 r = -EINVAL;
3083         }
3084         radeon_scratch_free(rdev, scratch);
3085         radeon_ib_free(rdev, &ib);
3086         return r;
3087 }