0938c21e792d0c89911542cfd9a2ebacecfa5d33
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "radeon_drm.h"
32 #include "sid.h"
33 #include "atom.h"
34
35 #define SI_PFP_UCODE_SIZE 2144
36 #define SI_PM4_UCODE_SIZE 2144
37 #define SI_CE_UCODE_SIZE 2144
38 #define SI_RLC_UCODE_SIZE 2048
39 #define SI_MC_UCODE_SIZE 7769
40
41 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
51 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
52 MODULE_FIRMWARE("radeon/VERDE_me.bin");
53 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
54 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
56
57 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
58 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
59 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
60
61 /* get temperature in millidegrees */
62 int si_get_temp(struct radeon_device *rdev)
63 {
64         u32 temp;
65         int actual_temp = 0;
66
67         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
68                 CTF_TEMP_SHIFT;
69
70         if (temp & 0x200)
71                 actual_temp = 255;
72         else
73                 actual_temp = temp & 0x1ff;
74
75         actual_temp = (actual_temp * 1000);
76
77         return actual_temp;
78 }
79
80 static int si_init_microcode(struct radeon_device *rdev)
81 {
82         struct platform_device *pdev;
83         const char *chip_name;
84         const char *rlc_chip_name;
85         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
86         char fw_name[30];
87         int err;
88
89         DRM_DEBUG("\n");
90
91         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
92         err = IS_ERR(pdev);
93         if (err) {
94                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
95                 return -EINVAL;
96         }
97
98         switch (rdev->family) {
99         case CHIP_TAHITI:
100                 chip_name = "TAHITI";
101                 rlc_chip_name = "TAHITI";
102                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
103                 me_req_size = SI_PM4_UCODE_SIZE * 4;
104                 ce_req_size = SI_CE_UCODE_SIZE * 4;
105                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
106                 mc_req_size = SI_MC_UCODE_SIZE * 4;
107                 break;
108         case CHIP_PITCAIRN:
109                 chip_name = "PITCAIRN";
110                 rlc_chip_name = "PITCAIRN";
111                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
112                 me_req_size = SI_PM4_UCODE_SIZE * 4;
113                 ce_req_size = SI_CE_UCODE_SIZE * 4;
114                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
115                 mc_req_size = SI_MC_UCODE_SIZE * 4;
116                 break;
117         case CHIP_VERDE:
118                 chip_name = "VERDE";
119                 rlc_chip_name = "VERDE";
120                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
121                 me_req_size = SI_PM4_UCODE_SIZE * 4;
122                 ce_req_size = SI_CE_UCODE_SIZE * 4;
123                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
124                 mc_req_size = SI_MC_UCODE_SIZE * 4;
125                 break;
126         default: BUG();
127         }
128
129         DRM_INFO("Loading %s Microcode\n", chip_name);
130
131         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
132         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
133         if (err)
134                 goto out;
135         if (rdev->pfp_fw->size != pfp_req_size) {
136                 printk(KERN_ERR
137                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
138                        rdev->pfp_fw->size, fw_name);
139                 err = -EINVAL;
140                 goto out;
141         }
142
143         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
144         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
145         if (err)
146                 goto out;
147         if (rdev->me_fw->size != me_req_size) {
148                 printk(KERN_ERR
149                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
150                        rdev->me_fw->size, fw_name);
151                 err = -EINVAL;
152         }
153
154         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
155         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
156         if (err)
157                 goto out;
158         if (rdev->ce_fw->size != ce_req_size) {
159                 printk(KERN_ERR
160                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
161                        rdev->ce_fw->size, fw_name);
162                 err = -EINVAL;
163         }
164
165         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
166         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
167         if (err)
168                 goto out;
169         if (rdev->rlc_fw->size != rlc_req_size) {
170                 printk(KERN_ERR
171                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
172                        rdev->rlc_fw->size, fw_name);
173                 err = -EINVAL;
174         }
175
176         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
177         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
178         if (err)
179                 goto out;
180         if (rdev->mc_fw->size != mc_req_size) {
181                 printk(KERN_ERR
182                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
183                        rdev->mc_fw->size, fw_name);
184                 err = -EINVAL;
185         }
186
187 out:
188         platform_device_unregister(pdev);
189
190         if (err) {
191                 if (err != -EINVAL)
192                         printk(KERN_ERR
193                                "si_cp: Failed to load firmware \"%s\"\n",
194                                fw_name);
195                 release_firmware(rdev->pfp_fw);
196                 rdev->pfp_fw = NULL;
197                 release_firmware(rdev->me_fw);
198                 rdev->me_fw = NULL;
199                 release_firmware(rdev->ce_fw);
200                 rdev->ce_fw = NULL;
201                 release_firmware(rdev->rlc_fw);
202                 rdev->rlc_fw = NULL;
203                 release_firmware(rdev->mc_fw);
204                 rdev->mc_fw = NULL;
205         }
206         return err;
207 }
208
209 /* watermark setup */
210 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
211                                    struct radeon_crtc *radeon_crtc,
212                                    struct drm_display_mode *mode,
213                                    struct drm_display_mode *other_mode)
214 {
215         u32 tmp;
216         /*
217          * Line Buffer Setup
218          * There are 3 line buffers, each one shared by 2 display controllers.
219          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
220          * the display controllers.  The paritioning is done via one of four
221          * preset allocations specified in bits 21:20:
222          *  0 - half lb
223          *  2 - whole lb, other crtc must be disabled
224          */
225         /* this can get tricky if we have two large displays on a paired group
226          * of crtcs.  Ideally for multiple large displays we'd assign them to
227          * non-linked crtcs for maximum line buffer allocation.
228          */
229         if (radeon_crtc->base.enabled && mode) {
230                 if (other_mode)
231                         tmp = 0; /* 1/2 */
232                 else
233                         tmp = 2; /* whole */
234         } else
235                 tmp = 0;
236
237         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
238                DC_LB_MEMORY_CONFIG(tmp));
239
240         if (radeon_crtc->base.enabled && mode) {
241                 switch (tmp) {
242                 case 0:
243                 default:
244                         return 4096 * 2;
245                 case 2:
246                         return 8192 * 2;
247                 }
248         }
249
250         /* controller not enabled, so no lb used */
251         return 0;
252 }
253
254 static u32 dce6_get_number_of_dram_channels(struct radeon_device *rdev)
255 {
256         u32 tmp = RREG32(MC_SHARED_CHMAP);
257
258         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
259         case 0:
260         default:
261                 return 1;
262         case 1:
263                 return 2;
264         case 2:
265                 return 4;
266         case 3:
267                 return 8;
268         case 4:
269                 return 3;
270         case 5:
271                 return 6;
272         case 6:
273                 return 10;
274         case 7:
275                 return 12;
276         case 8:
277                 return 16;
278         }
279 }
280
281 struct dce6_wm_params {
282         u32 dram_channels; /* number of dram channels */
283         u32 yclk;          /* bandwidth per dram data pin in kHz */
284         u32 sclk;          /* engine clock in kHz */
285         u32 disp_clk;      /* display clock in kHz */
286         u32 src_width;     /* viewport width */
287         u32 active_time;   /* active display time in ns */
288         u32 blank_time;    /* blank time in ns */
289         bool interlaced;    /* mode is interlaced */
290         fixed20_12 vsc;    /* vertical scale ratio */
291         u32 num_heads;     /* number of active crtcs */
292         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
293         u32 lb_size;       /* line buffer allocated to pipe */
294         u32 vtaps;         /* vertical scaler taps */
295 };
296
297 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
298 {
299         /* Calculate raw DRAM Bandwidth */
300         fixed20_12 dram_efficiency; /* 0.7 */
301         fixed20_12 yclk, dram_channels, bandwidth;
302         fixed20_12 a;
303
304         a.full = dfixed_const(1000);
305         yclk.full = dfixed_const(wm->yclk);
306         yclk.full = dfixed_div(yclk, a);
307         dram_channels.full = dfixed_const(wm->dram_channels * 4);
308         a.full = dfixed_const(10);
309         dram_efficiency.full = dfixed_const(7);
310         dram_efficiency.full = dfixed_div(dram_efficiency, a);
311         bandwidth.full = dfixed_mul(dram_channels, yclk);
312         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
313
314         return dfixed_trunc(bandwidth);
315 }
316
317 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
318 {
319         /* Calculate DRAM Bandwidth and the part allocated to display. */
320         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
321         fixed20_12 yclk, dram_channels, bandwidth;
322         fixed20_12 a;
323
324         a.full = dfixed_const(1000);
325         yclk.full = dfixed_const(wm->yclk);
326         yclk.full = dfixed_div(yclk, a);
327         dram_channels.full = dfixed_const(wm->dram_channels * 4);
328         a.full = dfixed_const(10);
329         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
330         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
331         bandwidth.full = dfixed_mul(dram_channels, yclk);
332         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
333
334         return dfixed_trunc(bandwidth);
335 }
336
337 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
338 {
339         /* Calculate the display Data return Bandwidth */
340         fixed20_12 return_efficiency; /* 0.8 */
341         fixed20_12 sclk, bandwidth;
342         fixed20_12 a;
343
344         a.full = dfixed_const(1000);
345         sclk.full = dfixed_const(wm->sclk);
346         sclk.full = dfixed_div(sclk, a);
347         a.full = dfixed_const(10);
348         return_efficiency.full = dfixed_const(8);
349         return_efficiency.full = dfixed_div(return_efficiency, a);
350         a.full = dfixed_const(32);
351         bandwidth.full = dfixed_mul(a, sclk);
352         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
353
354         return dfixed_trunc(bandwidth);
355 }
356
357 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
358 {
359         return 32;
360 }
361
362 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
363 {
364         /* Calculate the DMIF Request Bandwidth */
365         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
366         fixed20_12 disp_clk, sclk, bandwidth;
367         fixed20_12 a, b1, b2;
368         u32 min_bandwidth;
369
370         a.full = dfixed_const(1000);
371         disp_clk.full = dfixed_const(wm->disp_clk);
372         disp_clk.full = dfixed_div(disp_clk, a);
373         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
374         b1.full = dfixed_mul(a, disp_clk);
375
376         a.full = dfixed_const(1000);
377         sclk.full = dfixed_const(wm->sclk);
378         sclk.full = dfixed_div(sclk, a);
379         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
380         b2.full = dfixed_mul(a, sclk);
381
382         a.full = dfixed_const(10);
383         disp_clk_request_efficiency.full = dfixed_const(8);
384         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
385
386         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
387
388         a.full = dfixed_const(min_bandwidth);
389         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
390
391         return dfixed_trunc(bandwidth);
392 }
393
394 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
395 {
396         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
397         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
398         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
399         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
400
401         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
402 }
403
404 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
405 {
406         /* Calculate the display mode Average Bandwidth
407          * DisplayMode should contain the source and destination dimensions,
408          * timing, etc.
409          */
410         fixed20_12 bpp;
411         fixed20_12 line_time;
412         fixed20_12 src_width;
413         fixed20_12 bandwidth;
414         fixed20_12 a;
415
416         a.full = dfixed_const(1000);
417         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
418         line_time.full = dfixed_div(line_time, a);
419         bpp.full = dfixed_const(wm->bytes_per_pixel);
420         src_width.full = dfixed_const(wm->src_width);
421         bandwidth.full = dfixed_mul(src_width, bpp);
422         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
423         bandwidth.full = dfixed_div(bandwidth, line_time);
424
425         return dfixed_trunc(bandwidth);
426 }
427
428 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
429 {
430         /* First calcualte the latency in ns */
431         u32 mc_latency = 2000; /* 2000 ns. */
432         u32 available_bandwidth = dce6_available_bandwidth(wm);
433         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
434         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
435         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
436         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
437                 (wm->num_heads * cursor_line_pair_return_time);
438         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
439         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
440         u32 tmp, dmif_size = 12288;
441         fixed20_12 a, b, c;
442
443         if (wm->num_heads == 0)
444                 return 0;
445
446         a.full = dfixed_const(2);
447         b.full = dfixed_const(1);
448         if ((wm->vsc.full > a.full) ||
449             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
450             (wm->vtaps >= 5) ||
451             ((wm->vsc.full >= a.full) && wm->interlaced))
452                 max_src_lines_per_dst_line = 4;
453         else
454                 max_src_lines_per_dst_line = 2;
455
456         a.full = dfixed_const(available_bandwidth);
457         b.full = dfixed_const(wm->num_heads);
458         a.full = dfixed_div(a, b);
459
460         b.full = dfixed_const(mc_latency + 512);
461         c.full = dfixed_const(wm->disp_clk);
462         b.full = dfixed_div(b, c);
463
464         c.full = dfixed_const(dmif_size);
465         b.full = dfixed_div(c, b);
466
467         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
468
469         b.full = dfixed_const(1000);
470         c.full = dfixed_const(wm->disp_clk);
471         b.full = dfixed_div(c, b);
472         c.full = dfixed_const(wm->bytes_per_pixel);
473         b.full = dfixed_mul(b, c);
474
475         lb_fill_bw = min(tmp, dfixed_trunc(b));
476
477         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
478         b.full = dfixed_const(1000);
479         c.full = dfixed_const(lb_fill_bw);
480         b.full = dfixed_div(c, b);
481         a.full = dfixed_div(a, b);
482         line_fill_time = dfixed_trunc(a);
483
484         if (line_fill_time < wm->active_time)
485                 return latency;
486         else
487                 return latency + (line_fill_time - wm->active_time);
488
489 }
490
491 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
492 {
493         if (dce6_average_bandwidth(wm) <=
494             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
495                 return true;
496         else
497                 return false;
498 };
499
500 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
501 {
502         if (dce6_average_bandwidth(wm) <=
503             (dce6_available_bandwidth(wm) / wm->num_heads))
504                 return true;
505         else
506                 return false;
507 };
508
509 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
510 {
511         u32 lb_partitions = wm->lb_size / wm->src_width;
512         u32 line_time = wm->active_time + wm->blank_time;
513         u32 latency_tolerant_lines;
514         u32 latency_hiding;
515         fixed20_12 a;
516
517         a.full = dfixed_const(1);
518         if (wm->vsc.full > a.full)
519                 latency_tolerant_lines = 1;
520         else {
521                 if (lb_partitions <= (wm->vtaps + 1))
522                         latency_tolerant_lines = 1;
523                 else
524                         latency_tolerant_lines = 2;
525         }
526
527         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
528
529         if (dce6_latency_watermark(wm) <= latency_hiding)
530                 return true;
531         else
532                 return false;
533 }
534
535 static void dce6_program_watermarks(struct radeon_device *rdev,
536                                          struct radeon_crtc *radeon_crtc,
537                                          u32 lb_size, u32 num_heads)
538 {
539         struct drm_display_mode *mode = &radeon_crtc->base.mode;
540         struct dce6_wm_params wm;
541         u32 pixel_period;
542         u32 line_time = 0;
543         u32 latency_watermark_a = 0, latency_watermark_b = 0;
544         u32 priority_a_mark = 0, priority_b_mark = 0;
545         u32 priority_a_cnt = PRIORITY_OFF;
546         u32 priority_b_cnt = PRIORITY_OFF;
547         u32 tmp, arb_control3;
548         fixed20_12 a, b, c;
549
550         if (radeon_crtc->base.enabled && num_heads && mode) {
551                 pixel_period = 1000000 / (u32)mode->clock;
552                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
553                 priority_a_cnt = 0;
554                 priority_b_cnt = 0;
555
556                 wm.yclk = rdev->pm.current_mclk * 10;
557                 wm.sclk = rdev->pm.current_sclk * 10;
558                 wm.disp_clk = mode->clock;
559                 wm.src_width = mode->crtc_hdisplay;
560                 wm.active_time = mode->crtc_hdisplay * pixel_period;
561                 wm.blank_time = line_time - wm.active_time;
562                 wm.interlaced = false;
563                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
564                         wm.interlaced = true;
565                 wm.vsc = radeon_crtc->vsc;
566                 wm.vtaps = 1;
567                 if (radeon_crtc->rmx_type != RMX_OFF)
568                         wm.vtaps = 2;
569                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
570                 wm.lb_size = lb_size;
571                 wm.dram_channels = dce6_get_number_of_dram_channels(rdev);
572                 wm.num_heads = num_heads;
573
574                 /* set for high clocks */
575                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
576                 /* set for low clocks */
577                 /* wm.yclk = low clk; wm.sclk = low clk */
578                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
579
580                 /* possibly force display priority to high */
581                 /* should really do this at mode validation time... */
582                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
583                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
584                     !dce6_check_latency_hiding(&wm) ||
585                     (rdev->disp_priority == 2)) {
586                         DRM_DEBUG_KMS("force priority to high\n");
587                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
588                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
589                 }
590
591                 a.full = dfixed_const(1000);
592                 b.full = dfixed_const(mode->clock);
593                 b.full = dfixed_div(b, a);
594                 c.full = dfixed_const(latency_watermark_a);
595                 c.full = dfixed_mul(c, b);
596                 c.full = dfixed_mul(c, radeon_crtc->hsc);
597                 c.full = dfixed_div(c, a);
598                 a.full = dfixed_const(16);
599                 c.full = dfixed_div(c, a);
600                 priority_a_mark = dfixed_trunc(c);
601                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
602
603                 a.full = dfixed_const(1000);
604                 b.full = dfixed_const(mode->clock);
605                 b.full = dfixed_div(b, a);
606                 c.full = dfixed_const(latency_watermark_b);
607                 c.full = dfixed_mul(c, b);
608                 c.full = dfixed_mul(c, radeon_crtc->hsc);
609                 c.full = dfixed_div(c, a);
610                 a.full = dfixed_const(16);
611                 c.full = dfixed_div(c, a);
612                 priority_b_mark = dfixed_trunc(c);
613                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
614         }
615
616         /* select wm A */
617         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
618         tmp = arb_control3;
619         tmp &= ~LATENCY_WATERMARK_MASK(3);
620         tmp |= LATENCY_WATERMARK_MASK(1);
621         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
622         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
623                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
624                 LATENCY_HIGH_WATERMARK(line_time)));
625         /* select wm B */
626         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
627         tmp &= ~LATENCY_WATERMARK_MASK(3);
628         tmp |= LATENCY_WATERMARK_MASK(2);
629         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
630         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
631                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
632                 LATENCY_HIGH_WATERMARK(line_time)));
633         /* restore original selection */
634         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
635
636         /* write the priority marks */
637         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
638         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
639
640 }
641
642 void dce6_bandwidth_update(struct radeon_device *rdev)
643 {
644         struct drm_display_mode *mode0 = NULL;
645         struct drm_display_mode *mode1 = NULL;
646         u32 num_heads = 0, lb_size;
647         int i;
648
649         radeon_update_display_priority(rdev);
650
651         for (i = 0; i < rdev->num_crtc; i++) {
652                 if (rdev->mode_info.crtcs[i]->base.enabled)
653                         num_heads++;
654         }
655         for (i = 0; i < rdev->num_crtc; i += 2) {
656                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
657                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
658                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
659                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
660                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
661                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
662         }
663 }
664
665 /*
666  * Core functions
667  */
668 static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
669                                            u32 num_tile_pipes,
670                                            u32 num_backends_per_asic,
671                                            u32 *backend_disable_mask_per_asic,
672                                            u32 num_shader_engines)
673 {
674         u32 backend_map = 0;
675         u32 enabled_backends_mask = 0;
676         u32 enabled_backends_count = 0;
677         u32 num_backends_per_se;
678         u32 cur_pipe;
679         u32 swizzle_pipe[SI_MAX_PIPES];
680         u32 cur_backend = 0;
681         u32 i;
682         bool force_no_swizzle;
683
684         /* force legal values */
685         if (num_tile_pipes < 1)
686                 num_tile_pipes = 1;
687         if (num_tile_pipes > rdev->config.si.max_tile_pipes)
688                 num_tile_pipes = rdev->config.si.max_tile_pipes;
689         if (num_shader_engines < 1)
690                 num_shader_engines = 1;
691         if (num_shader_engines > rdev->config.si.max_shader_engines)
692                 num_shader_engines = rdev->config.si.max_shader_engines;
693         if (num_backends_per_asic < num_shader_engines)
694                 num_backends_per_asic = num_shader_engines;
695         if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
696                 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
697
698         /* make sure we have the same number of backends per se */
699         num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
700         /* set up the number of backends per se */
701         num_backends_per_se = num_backends_per_asic / num_shader_engines;
702         if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
703                 num_backends_per_se = rdev->config.si.max_backends_per_se;
704                 num_backends_per_asic = num_backends_per_se * num_shader_engines;
705         }
706
707         /* create enable mask and count for enabled backends */
708         for (i = 0; i < SI_MAX_BACKENDS; ++i) {
709                 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
710                         enabled_backends_mask |= (1 << i);
711                         ++enabled_backends_count;
712                 }
713                 if (enabled_backends_count == num_backends_per_asic)
714                         break;
715         }
716
717         /* force the backends mask to match the current number of backends */
718         if (enabled_backends_count != num_backends_per_asic) {
719                 u32 this_backend_enabled;
720                 u32 shader_engine;
721                 u32 backend_per_se;
722
723                 enabled_backends_mask = 0;
724                 enabled_backends_count = 0;
725                 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
726                 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
727                         /* calc the current se */
728                         shader_engine = i / rdev->config.si.max_backends_per_se;
729                         /* calc the backend per se */
730                         backend_per_se = i % rdev->config.si.max_backends_per_se;
731                         /* default to not enabled */
732                         this_backend_enabled = 0;
733                         if ((shader_engine < num_shader_engines) &&
734                             (backend_per_se < num_backends_per_se))
735                                 this_backend_enabled = 1;
736                         if (this_backend_enabled) {
737                                 enabled_backends_mask |= (1 << i);
738                                 *backend_disable_mask_per_asic &= ~(1 << i);
739                                 ++enabled_backends_count;
740                         }
741                 }
742         }
743
744
745         memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
746         switch (rdev->family) {
747         case CHIP_TAHITI:
748         case CHIP_PITCAIRN:
749         case CHIP_VERDE:
750                 force_no_swizzle = true;
751                 break;
752         default:
753                 force_no_swizzle = false;
754                 break;
755         }
756         if (force_no_swizzle) {
757                 bool last_backend_enabled = false;
758
759                 force_no_swizzle = false;
760                 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
761                         if (((enabled_backends_mask >> i) & 1) == 1) {
762                                 if (last_backend_enabled)
763                                         force_no_swizzle = true;
764                                 last_backend_enabled = true;
765                         } else
766                                 last_backend_enabled = false;
767                 }
768         }
769
770         switch (num_tile_pipes) {
771         case 1:
772         case 3:
773         case 5:
774         case 7:
775                 DRM_ERROR("odd number of pipes!\n");
776                 break;
777         case 2:
778                 swizzle_pipe[0] = 0;
779                 swizzle_pipe[1] = 1;
780                 break;
781         case 4:
782                 if (force_no_swizzle) {
783                         swizzle_pipe[0] = 0;
784                         swizzle_pipe[1] = 1;
785                         swizzle_pipe[2] = 2;
786                         swizzle_pipe[3] = 3;
787                 } else {
788                         swizzle_pipe[0] = 0;
789                         swizzle_pipe[1] = 2;
790                         swizzle_pipe[2] = 1;
791                         swizzle_pipe[3] = 3;
792                 }
793                 break;
794         case 6:
795                 if (force_no_swizzle) {
796                         swizzle_pipe[0] = 0;
797                         swizzle_pipe[1] = 1;
798                         swizzle_pipe[2] = 2;
799                         swizzle_pipe[3] = 3;
800                         swizzle_pipe[4] = 4;
801                         swizzle_pipe[5] = 5;
802                 } else {
803                         swizzle_pipe[0] = 0;
804                         swizzle_pipe[1] = 2;
805                         swizzle_pipe[2] = 4;
806                         swizzle_pipe[3] = 1;
807                         swizzle_pipe[4] = 3;
808                         swizzle_pipe[5] = 5;
809                 }
810                 break;
811         case 8:
812                 if (force_no_swizzle) {
813                         swizzle_pipe[0] = 0;
814                         swizzle_pipe[1] = 1;
815                         swizzle_pipe[2] = 2;
816                         swizzle_pipe[3] = 3;
817                         swizzle_pipe[4] = 4;
818                         swizzle_pipe[5] = 5;
819                         swizzle_pipe[6] = 6;
820                         swizzle_pipe[7] = 7;
821                 } else {
822                         swizzle_pipe[0] = 0;
823                         swizzle_pipe[1] = 2;
824                         swizzle_pipe[2] = 4;
825                         swizzle_pipe[3] = 6;
826                         swizzle_pipe[4] = 1;
827                         swizzle_pipe[5] = 3;
828                         swizzle_pipe[6] = 5;
829                         swizzle_pipe[7] = 7;
830                 }
831                 break;
832         }
833
834         for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
835                 while (((1 << cur_backend) & enabled_backends_mask) == 0)
836                         cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
837
838                 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
839
840                 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
841         }
842
843         return backend_map;
844 }
845
846 static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
847                                         u32 disable_mask_per_se,
848                                         u32 max_disable_mask_per_se,
849                                         u32 num_shader_engines)
850 {
851         u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
852         u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
853
854         if (num_shader_engines == 1)
855                 return disable_mask_per_asic;
856         else if (num_shader_engines == 2)
857                 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
858         else
859                 return 0xffffffff;
860 }
861
862 static void si_tiling_mode_table_init(struct radeon_device *rdev)
863 {
864         const u32 num_tile_mode_states = 32;
865         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
866
867         switch (rdev->config.si.mem_row_size_in_kb) {
868         case 1:
869                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
870                 break;
871         case 2:
872         default:
873                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
874                 break;
875         case 4:
876                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
877                 break;
878         }
879
880         if ((rdev->family == CHIP_TAHITI) ||
881             (rdev->family == CHIP_PITCAIRN)) {
882                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
883                         switch (reg_offset) {
884                         case 0:  /* non-AA compressed depth or any compressed stencil */
885                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
886                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
887                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
888                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
889                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
890                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
891                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
892                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
893                                 break;
894                         case 1:  /* 2xAA/4xAA compressed depth only */
895                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
896                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
897                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
898                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
899                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
900                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
901                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
902                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
903                                 break;
904                         case 2:  /* 8xAA compressed depth only */
905                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
906                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
907                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
908                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
909                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
910                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
911                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
912                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
913                                 break;
914                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
915                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
916                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
917                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
918                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
919                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
920                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
923                                 break;
924                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
925                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
926                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
927                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
928                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
929                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
930                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
931                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
932                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
933                                 break;
934                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
935                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
936                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
937                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
938                                                  TILE_SPLIT(split_equal_to_row_size) |
939                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
940                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
941                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
942                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
943                                 break;
944                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
945                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
947                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
948                                                  TILE_SPLIT(split_equal_to_row_size) |
949                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
950                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
953                                 break;
954                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
955                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
957                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
958                                                  TILE_SPLIT(split_equal_to_row_size) |
959                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
960                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
961                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
962                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
963                                 break;
964                         case 8:  /* 1D and 1D Array Surfaces */
965                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
966                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
967                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
968                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
969                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
970                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
971                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
972                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
973                                 break;
974                         case 9:  /* Displayable maps. */
975                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
976                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
977                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
978                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
979                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
980                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
983                                 break;
984                         case 10:  /* Display 8bpp. */
985                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
986                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
987                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
988                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
989                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
990                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
991                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
992                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
993                                 break;
994                         case 11:  /* Display 16bpp. */
995                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
997                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
998                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
999                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1000                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1001                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1002                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1003                                 break;
1004                         case 12:  /* Display 32bpp. */
1005                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1006                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1007                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1008                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1009                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1010                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1011                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1012                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1013                                 break;
1014                         case 13:  /* Thin. */
1015                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1017                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1018                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1019                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1020                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1021                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1022                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1023                                 break;
1024                         case 14:  /* Thin 8 bpp. */
1025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1027                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1029                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1030                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1031                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1032                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1033                                 break;
1034                         case 15:  /* Thin 16 bpp. */
1035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1036                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1037                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1038                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1039                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1040                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1043                                 break;
1044                         case 16:  /* Thin 32 bpp. */
1045                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1046                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1047                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1048                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1049                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1050                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1052                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1053                                 break;
1054                         case 17:  /* Thin 64 bpp. */
1055                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1056                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1057                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1058                                                  TILE_SPLIT(split_equal_to_row_size) |
1059                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1060                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1061                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1062                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1063                                 break;
1064                         case 21:  /* 8 bpp PRT. */
1065                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1066                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1067                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1068                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1069                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1070                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1073                                 break;
1074                         case 22:  /* 16 bpp PRT */
1075                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1076                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1077                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1078                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1079                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1080                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1083                                 break;
1084                         case 23:  /* 32 bpp PRT */
1085                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1086                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1087                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1088                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1089                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1090                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1091                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1092                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1093                                 break;
1094                         case 24:  /* 64 bpp PRT */
1095                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1096                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1097                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1098                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1099                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1100                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1103                                 break;
1104                         case 25:  /* 128 bpp PRT */
1105                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1106                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1107                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1108                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1109                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
1110                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1113                                 break;
1114                         default:
1115                                 gb_tile_moden = 0;
1116                                 break;
1117                         }
1118                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1119                 }
1120         } else if (rdev->family == CHIP_VERDE) {
1121                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1122                         switch (reg_offset) {
1123                         case 0:  /* non-AA compressed depth or any compressed stencil */
1124                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1125                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1126                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1127                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1128                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1129                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1132                                 break;
1133                         case 1:  /* 2xAA/4xAA compressed depth only */
1134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1135                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1136                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1137                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1138                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1139                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1140                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1141                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1142                                 break;
1143                         case 2:  /* 8xAA compressed depth only */
1144                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1145                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1146                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1147                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1148                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1149                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1152                                 break;
1153                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1154                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1155                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1156                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1157                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1158                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1159                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1162                                 break;
1163                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1164                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1165                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1166                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1167                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1168                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1169                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1170                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1171                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1172                                 break;
1173                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1174                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1175                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1176                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1177                                                  TILE_SPLIT(split_equal_to_row_size) |
1178                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1179                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1182                                 break;
1183                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1185                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1186                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1187                                                  TILE_SPLIT(split_equal_to_row_size) |
1188                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1189                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1190                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1191                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1192                                 break;
1193                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1194                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1195                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1196                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1197                                                  TILE_SPLIT(split_equal_to_row_size) |
1198                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1199                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1200                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1201                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1202                                 break;
1203                         case 8:  /* 1D and 1D Array Surfaces */
1204                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1205                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1206                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1207                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1208                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1209                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1210                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1211                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1212                                 break;
1213                         case 9:  /* Displayable maps. */
1214                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1215                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1216                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1217                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1218                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1219                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1220                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1221                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1222                                 break;
1223                         case 10:  /* Display 8bpp. */
1224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1225                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1226                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1228                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1229                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1230                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1231                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1232                                 break;
1233                         case 11:  /* Display 16bpp. */
1234                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1236                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1237                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1238                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1239                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1240                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1241                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1242                                 break;
1243                         case 12:  /* Display 32bpp. */
1244                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1245                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1246                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1248                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1249                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1250                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1251                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1252                                 break;
1253                         case 13:  /* Thin. */
1254                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1255                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1256                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1257                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1258                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1259                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1260                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1261                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1262                                 break;
1263                         case 14:  /* Thin 8 bpp. */
1264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1265                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1266                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1267                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1268                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1269                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1272                                 break;
1273                         case 15:  /* Thin 16 bpp. */
1274                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1276                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1277                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1278                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1279                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1282                                 break;
1283                         case 16:  /* Thin 32 bpp. */
1284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1286                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1288                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1289                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1290                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1291                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1292                                 break;
1293                         case 17:  /* Thin 64 bpp. */
1294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1295                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1296                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1297                                                  TILE_SPLIT(split_equal_to_row_size) |
1298                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1299                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1302                                 break;
1303                         case 21:  /* 8 bpp PRT. */
1304                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1305                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1306                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1307                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1308                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1309                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1312                                 break;
1313                         case 22:  /* 16 bpp PRT */
1314                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1315                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1316                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1317                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1318                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1319                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1320                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1321                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1322                                 break;
1323                         case 23:  /* 32 bpp PRT */
1324                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1325                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1326                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1327                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1328                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1329                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1330                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1331                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1332                                 break;
1333                         case 24:  /* 64 bpp PRT */
1334                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1335                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1336                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1337                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1338                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1339                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1340                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1341                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1342                                 break;
1343                         case 25:  /* 128 bpp PRT */
1344                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1345                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1346                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1347                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1348                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
1349                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1352                                 break;
1353                         default:
1354                                 gb_tile_moden = 0;
1355                                 break;
1356                         }
1357                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1358                 }
1359         } else
1360                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1361 }
1362
1363 static void si_gpu_init(struct radeon_device *rdev)
1364 {
1365         u32 cc_rb_backend_disable = 0;
1366         u32 cc_gc_shader_array_config;
1367         u32 gb_addr_config = 0;
1368         u32 mc_shared_chmap, mc_arb_ramcfg;
1369         u32 gb_backend_map;
1370         u32 cgts_tcc_disable;
1371         u32 sx_debug_1;
1372         u32 gc_user_shader_array_config;
1373         u32 gc_user_rb_backend_disable;
1374         u32 cgts_user_tcc_disable;
1375         u32 hdp_host_path_cntl;
1376         u32 tmp;
1377         int i, j;
1378
1379         switch (rdev->family) {
1380         case CHIP_TAHITI:
1381                 rdev->config.si.max_shader_engines = 2;
1382                 rdev->config.si.max_pipes_per_simd = 4;
1383                 rdev->config.si.max_tile_pipes = 12;
1384                 rdev->config.si.max_simds_per_se = 8;
1385                 rdev->config.si.max_backends_per_se = 4;
1386                 rdev->config.si.max_texture_channel_caches = 12;
1387                 rdev->config.si.max_gprs = 256;
1388                 rdev->config.si.max_gs_threads = 32;
1389                 rdev->config.si.max_hw_contexts = 8;
1390
1391                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1392                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1393                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1394                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1395                 break;
1396         case CHIP_PITCAIRN:
1397                 rdev->config.si.max_shader_engines = 2;
1398                 rdev->config.si.max_pipes_per_simd = 4;
1399                 rdev->config.si.max_tile_pipes = 8;
1400                 rdev->config.si.max_simds_per_se = 5;
1401                 rdev->config.si.max_backends_per_se = 4;
1402                 rdev->config.si.max_texture_channel_caches = 8;
1403                 rdev->config.si.max_gprs = 256;
1404                 rdev->config.si.max_gs_threads = 32;
1405                 rdev->config.si.max_hw_contexts = 8;
1406
1407                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1408                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1409                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1410                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1411                 break;
1412         case CHIP_VERDE:
1413         default:
1414                 rdev->config.si.max_shader_engines = 1;
1415                 rdev->config.si.max_pipes_per_simd = 4;
1416                 rdev->config.si.max_tile_pipes = 4;
1417                 rdev->config.si.max_simds_per_se = 2;
1418                 rdev->config.si.max_backends_per_se = 4;
1419                 rdev->config.si.max_texture_channel_caches = 4;
1420                 rdev->config.si.max_gprs = 256;
1421                 rdev->config.si.max_gs_threads = 32;
1422                 rdev->config.si.max_hw_contexts = 8;
1423
1424                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1425                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1426                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1427                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1428                 break;
1429         }
1430
1431         /* Initialize HDP */
1432         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1433                 WREG32((0x2c14 + j), 0x00000000);
1434                 WREG32((0x2c18 + j), 0x00000000);
1435                 WREG32((0x2c1c + j), 0x00000000);
1436                 WREG32((0x2c20 + j), 0x00000000);
1437                 WREG32((0x2c24 + j), 0x00000000);
1438         }
1439
1440         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1441
1442         evergreen_fix_pci_max_read_req_size(rdev);
1443
1444         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1445
1446         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1447         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1448
1449         cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1450         cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1451         cgts_tcc_disable = 0xffff0000;
1452         for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1453                 cgts_tcc_disable &= ~(1 << (16 + i));
1454         gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1455         gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1456         cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1457
1458         rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1459         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1460         tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1461         rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1462         tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1463         rdev->config.si.backend_disable_mask_per_asic =
1464                 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1465                                              rdev->config.si.num_shader_engines);
1466         rdev->config.si.backend_map =
1467                 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1468                                                 rdev->config.si.num_backends_per_se *
1469                                                 rdev->config.si.num_shader_engines,
1470                                                 &rdev->config.si.backend_disable_mask_per_asic,
1471                                                 rdev->config.si.num_shader_engines);
1472         tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1473         rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1474         rdev->config.si.mem_max_burst_length_bytes = 256;
1475         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1476         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1477         if (rdev->config.si.mem_row_size_in_kb > 4)
1478                 rdev->config.si.mem_row_size_in_kb = 4;
1479         /* XXX use MC settings? */
1480         rdev->config.si.shader_engine_tile_size = 32;
1481         rdev->config.si.num_gpus = 1;
1482         rdev->config.si.multi_gpu_tile_size = 64;
1483
1484         gb_addr_config = 0;
1485         switch (rdev->config.si.num_tile_pipes) {
1486         case 1:
1487                 gb_addr_config |= NUM_PIPES(0);
1488                 break;
1489         case 2:
1490                 gb_addr_config |= NUM_PIPES(1);
1491                 break;
1492         case 4:
1493                 gb_addr_config |= NUM_PIPES(2);
1494                 break;
1495         case 8:
1496         default:
1497                 gb_addr_config |= NUM_PIPES(3);
1498                 break;
1499         }
1500
1501         tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1502         gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1503         gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1504         tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1505         gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1506         switch (rdev->config.si.num_gpus) {
1507         case 1:
1508         default:
1509                 gb_addr_config |= NUM_GPUS(0);
1510                 break;
1511         case 2:
1512                 gb_addr_config |= NUM_GPUS(1);
1513                 break;
1514         case 4:
1515                 gb_addr_config |= NUM_GPUS(2);
1516                 break;
1517         }
1518         switch (rdev->config.si.multi_gpu_tile_size) {
1519         case 16:
1520                 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1521                 break;
1522         case 32:
1523         default:
1524                 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1525                 break;
1526         case 64:
1527                 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1528                 break;
1529         case 128:
1530                 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1531                 break;
1532         }
1533         switch (rdev->config.si.mem_row_size_in_kb) {
1534         case 1:
1535         default:
1536                 gb_addr_config |= ROW_SIZE(0);
1537                 break;
1538         case 2:
1539                 gb_addr_config |= ROW_SIZE(1);
1540                 break;
1541         case 4:
1542                 gb_addr_config |= ROW_SIZE(2);
1543                 break;
1544         }
1545
1546         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1547         rdev->config.si.num_tile_pipes = (1 << tmp);
1548         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1549         rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1550         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1551         rdev->config.si.num_shader_engines = tmp + 1;
1552         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1553         rdev->config.si.num_gpus = tmp + 1;
1554         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1555         rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1556         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1557         rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1558
1559         gb_backend_map =
1560                 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1561                                                 rdev->config.si.num_backends_per_se *
1562                                                 rdev->config.si.num_shader_engines,
1563                                                 &rdev->config.si.backend_disable_mask_per_asic,
1564                                                 rdev->config.si.num_shader_engines);
1565
1566         /* setup tiling info dword.  gb_addr_config is not adequate since it does
1567          * not have bank info, so create a custom tiling dword.
1568          * bits 3:0   num_pipes
1569          * bits 7:4   num_banks
1570          * bits 11:8  group_size
1571          * bits 15:12 row_size
1572          */
1573         rdev->config.si.tile_config = 0;
1574         switch (rdev->config.si.num_tile_pipes) {
1575         case 1:
1576                 rdev->config.si.tile_config |= (0 << 0);
1577                 break;
1578         case 2:
1579                 rdev->config.si.tile_config |= (1 << 0);
1580                 break;
1581         case 4:
1582                 rdev->config.si.tile_config |= (2 << 0);
1583                 break;
1584         case 8:
1585         default:
1586                 /* XXX what about 12? */
1587                 rdev->config.si.tile_config |= (3 << 0);
1588                 break;
1589         }
1590         rdev->config.si.tile_config |=
1591                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
1592         rdev->config.si.tile_config |=
1593                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1594         rdev->config.si.tile_config |=
1595                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1596
1597         rdev->config.si.backend_map = gb_backend_map;
1598         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1599         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1600         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1601
1602         /* primary versions */
1603         WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1604         WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1605         WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1606
1607         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1608
1609         /* user versions */
1610         WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1611         WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1612         WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1613
1614         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1615
1616         si_tiling_mode_table_init(rdev);
1617
1618         /* set HW defaults for 3D engine */
1619         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1620                                      ROQ_IB2_START(0x2b)));
1621         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1622
1623         sx_debug_1 = RREG32(SX_DEBUG_1);
1624         WREG32(SX_DEBUG_1, sx_debug_1);
1625
1626         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1627
1628         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1629                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1630                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1631                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1632
1633         WREG32(VGT_NUM_INSTANCES, 1);
1634
1635         WREG32(CP_PERFMON_CNTL, 0);
1636
1637         WREG32(SQ_CONFIG, 0);
1638
1639         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1640                                           FORCE_EOV_MAX_REZ_CNT(255)));
1641
1642         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1643                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1644
1645         WREG32(VGT_GS_VERTEX_REUSE, 16);
1646         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1647
1648         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1649         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1650         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1651         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1652         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1653         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1654         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1655         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1656
1657         tmp = RREG32(HDP_MISC_CNTL);
1658         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1659         WREG32(HDP_MISC_CNTL, tmp);
1660
1661         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1662         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1663
1664         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1665
1666         udelay(50);
1667 }
1668
1669 bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1670 {
1671         u32 srbm_status;
1672         u32 grbm_status, grbm_status2;
1673         u32 grbm_status_se0, grbm_status_se1;
1674         struct r100_gpu_lockup *lockup = &rdev->config.si.lockup;
1675         int r;
1676
1677         srbm_status = RREG32(SRBM_STATUS);
1678         grbm_status = RREG32(GRBM_STATUS);
1679         grbm_status2 = RREG32(GRBM_STATUS2);
1680         grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1681         grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1682         if (!(grbm_status & GUI_ACTIVE)) {
1683                 r100_gpu_lockup_update(lockup, ring);
1684                 return false;
1685         }
1686         /* force CP activities */
1687         r = radeon_ring_lock(rdev, ring, 2);
1688         if (!r) {
1689                 /* PACKET2 NOP */
1690                 radeon_ring_write(ring, 0x80000000);
1691                 radeon_ring_write(ring, 0x80000000);
1692                 radeon_ring_unlock_commit(rdev, ring);
1693         }
1694         /* XXX deal with CP0,1,2 */
1695         ring->rptr = RREG32(ring->rptr_reg);
1696         return r100_gpu_cp_is_lockup(rdev, lockup, ring);
1697 }
1698
1699 static int si_gpu_soft_reset(struct radeon_device *rdev)
1700 {
1701         struct evergreen_mc_save save;
1702         u32 grbm_reset = 0;
1703
1704         if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1705                 return 0;
1706
1707         dev_info(rdev->dev, "GPU softreset \n");
1708         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1709                 RREG32(GRBM_STATUS));
1710         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
1711                 RREG32(GRBM_STATUS2));
1712         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1713                 RREG32(GRBM_STATUS_SE0));
1714         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1715                 RREG32(GRBM_STATUS_SE1));
1716         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1717                 RREG32(SRBM_STATUS));
1718         evergreen_mc_stop(rdev, &save);
1719         if (radeon_mc_wait_for_idle(rdev)) {
1720                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1721         }
1722         /* Disable CP parsing/prefetching */
1723         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
1724
1725         /* reset all the gfx blocks */
1726         grbm_reset = (SOFT_RESET_CP |
1727                       SOFT_RESET_CB |
1728                       SOFT_RESET_DB |
1729                       SOFT_RESET_GDS |
1730                       SOFT_RESET_PA |
1731                       SOFT_RESET_SC |
1732                       SOFT_RESET_SPI |
1733                       SOFT_RESET_SX |
1734                       SOFT_RESET_TC |
1735                       SOFT_RESET_TA |
1736                       SOFT_RESET_VGT |
1737                       SOFT_RESET_IA);
1738
1739         dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1740         WREG32(GRBM_SOFT_RESET, grbm_reset);
1741         (void)RREG32(GRBM_SOFT_RESET);
1742         udelay(50);
1743         WREG32(GRBM_SOFT_RESET, 0);
1744         (void)RREG32(GRBM_SOFT_RESET);
1745         /* Wait a little for things to settle down */
1746         udelay(50);
1747         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
1748                 RREG32(GRBM_STATUS));
1749         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
1750                 RREG32(GRBM_STATUS2));
1751         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
1752                 RREG32(GRBM_STATUS_SE0));
1753         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
1754                 RREG32(GRBM_STATUS_SE1));
1755         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
1756                 RREG32(SRBM_STATUS));
1757         evergreen_mc_resume(rdev, &save);
1758         return 0;
1759 }
1760
1761 int si_asic_reset(struct radeon_device *rdev)
1762 {
1763         return si_gpu_soft_reset(rdev);
1764 }
1765
1766 /* MC */
1767 static void si_mc_program(struct radeon_device *rdev)
1768 {
1769         struct evergreen_mc_save save;
1770         u32 tmp;
1771         int i, j;
1772
1773         /* Initialize HDP */
1774         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1775                 WREG32((0x2c14 + j), 0x00000000);
1776                 WREG32((0x2c18 + j), 0x00000000);
1777                 WREG32((0x2c1c + j), 0x00000000);
1778                 WREG32((0x2c20 + j), 0x00000000);
1779                 WREG32((0x2c24 + j), 0x00000000);
1780         }
1781         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
1782
1783         evergreen_mc_stop(rdev, &save);
1784         if (radeon_mc_wait_for_idle(rdev)) {
1785                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1786         }
1787         /* Lockout access through VGA aperture*/
1788         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
1789         /* Update configuration */
1790         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
1791                rdev->mc.vram_start >> 12);
1792         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
1793                rdev->mc.vram_end >> 12);
1794         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
1795                rdev->vram_scratch.gpu_addr >> 12);
1796         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
1797         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
1798         WREG32(MC_VM_FB_LOCATION, tmp);
1799         /* XXX double check these! */
1800         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
1801         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
1802         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
1803         WREG32(MC_VM_AGP_BASE, 0);
1804         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
1805         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
1806         if (radeon_mc_wait_for_idle(rdev)) {
1807                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1808         }
1809         evergreen_mc_resume(rdev, &save);
1810         /* we need to own VRAM, so turn off the VGA renderer here
1811          * to stop it overwriting our objects */
1812         rv515_vga_render_disable(rdev);
1813 }
1814
1815 /* SI MC address space is 40 bits */
1816 static void si_vram_location(struct radeon_device *rdev,
1817                              struct radeon_mc *mc, u64 base)
1818 {
1819         mc->vram_start = base;
1820         if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
1821                 dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
1822                 mc->real_vram_size = mc->aper_size;
1823                 mc->mc_vram_size = mc->aper_size;
1824         }
1825         mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
1826         dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
1827                         mc->mc_vram_size >> 20, mc->vram_start,
1828                         mc->vram_end, mc->real_vram_size >> 20);
1829 }
1830
1831 static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
1832 {
1833         u64 size_af, size_bf;
1834
1835         size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
1836         size_bf = mc->vram_start & ~mc->gtt_base_align;
1837         if (size_bf > size_af) {
1838                 if (mc->gtt_size > size_bf) {
1839                         dev_warn(rdev->dev, "limiting GTT\n");
1840                         mc->gtt_size = size_bf;
1841                 }
1842                 mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
1843         } else {
1844                 if (mc->gtt_size > size_af) {
1845                         dev_warn(rdev->dev, "limiting GTT\n");
1846                         mc->gtt_size = size_af;
1847                 }
1848                 mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
1849         }
1850         mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
1851         dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
1852                         mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
1853 }
1854
1855 static void si_vram_gtt_location(struct radeon_device *rdev,
1856                                  struct radeon_mc *mc)
1857 {
1858         if (mc->mc_vram_size > 0xFFC0000000ULL) {
1859                 /* leave room for at least 1024M GTT */
1860                 dev_warn(rdev->dev, "limiting VRAM\n");
1861                 mc->real_vram_size = 0xFFC0000000ULL;
1862                 mc->mc_vram_size = 0xFFC0000000ULL;
1863         }
1864         si_vram_location(rdev, &rdev->mc, 0);
1865         rdev->mc.gtt_base_align = 0;
1866         si_gtt_location(rdev, mc);
1867 }
1868
1869 static int si_mc_init(struct radeon_device *rdev)
1870 {
1871         u32 tmp;
1872         int chansize, numchan;
1873
1874         /* Get VRAM informations */
1875         rdev->mc.vram_is_ddr = true;
1876         tmp = RREG32(MC_ARB_RAMCFG);
1877         if (tmp & CHANSIZE_OVERRIDE) {
1878                 chansize = 16;
1879         } else if (tmp & CHANSIZE_MASK) {
1880                 chansize = 64;
1881         } else {
1882                 chansize = 32;
1883         }
1884         tmp = RREG32(MC_SHARED_CHMAP);
1885         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1886         case 0:
1887         default:
1888                 numchan = 1;
1889                 break;
1890         case 1:
1891                 numchan = 2;
1892                 break;
1893         case 2:
1894                 numchan = 4;
1895                 break;
1896         case 3:
1897                 numchan = 8;
1898                 break;
1899         case 4:
1900                 numchan = 3;
1901                 break;
1902         case 5:
1903                 numchan = 6;
1904                 break;
1905         case 6:
1906                 numchan = 10;
1907                 break;
1908         case 7:
1909                 numchan = 12;
1910                 break;
1911         case 8:
1912                 numchan = 16;
1913                 break;
1914         }
1915         rdev->mc.vram_width = numchan * chansize;
1916         /* Could aper size report 0 ? */
1917         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
1918         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1919         /* size in MB on si */
1920         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1921         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1922         rdev->mc.visible_vram_size = rdev->mc.aper_size;
1923         si_vram_gtt_location(rdev, &rdev->mc);
1924         radeon_update_bandwidth_info(rdev);
1925
1926         return 0;
1927 }
1928
1929 /*
1930  * GART
1931  */
1932 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
1933 {
1934         /* flush hdp cache */
1935         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1936
1937         /* bits 0-15 are the VM contexts0-15 */
1938         WREG32(VM_INVALIDATE_REQUEST, 1);
1939 }
1940
1941 int si_pcie_gart_enable(struct radeon_device *rdev)
1942 {
1943         int r, i;
1944
1945         if (rdev->gart.robj == NULL) {
1946                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1947                 return -EINVAL;
1948         }
1949         r = radeon_gart_table_vram_pin(rdev);
1950         if (r)
1951                 return r;
1952         radeon_gart_restore(rdev);
1953         /* Setup TLB control */
1954         WREG32(MC_VM_MX_L1_TLB_CNTL,
1955                (0xA << 7) |
1956                ENABLE_L1_TLB |
1957                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1958                ENABLE_ADVANCED_DRIVER_MODEL |
1959                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1960         /* Setup L2 cache */
1961         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1962                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1963                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1964                EFFECTIVE_L2_QUEUE_SIZE(7) |
1965                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1966         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1967         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1968                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
1969         /* setup context0 */
1970         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1971         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1972         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1973         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1974                         (u32)(rdev->dummy_page.addr >> 12));
1975         WREG32(VM_CONTEXT0_CNTL2, 0);
1976         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1977                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
1978
1979         WREG32(0x15D4, 0);
1980         WREG32(0x15D8, 0);
1981         WREG32(0x15DC, 0);
1982
1983         /* empty context1-15 */
1984         /* FIXME start with 1G, once using 2 level pt switch to full
1985          * vm size space
1986          */
1987         /* set vm size, must be a multiple of 4 */
1988         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
1989         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
1990         for (i = 1; i < 16; i++) {
1991                 if (i < 8)
1992                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1993                                rdev->gart.table_addr >> 12);
1994                 else
1995                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
1996                                rdev->gart.table_addr >> 12);
1997         }
1998
1999         /* enable context1-15 */
2000         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2001                (u32)(rdev->dummy_page.addr >> 12));
2002         WREG32(VM_CONTEXT1_CNTL2, 0);
2003         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2004                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
2005
2006         si_pcie_gart_tlb_flush(rdev);
2007         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2008                  (unsigned)(rdev->mc.gtt_size >> 20),
2009                  (unsigned long long)rdev->gart.table_addr);
2010         rdev->gart.ready = true;
2011         return 0;
2012 }
2013
2014 void si_pcie_gart_disable(struct radeon_device *rdev)
2015 {
2016         /* Disable all tables */
2017         WREG32(VM_CONTEXT0_CNTL, 0);
2018         WREG32(VM_CONTEXT1_CNTL, 0);
2019         /* Setup TLB control */
2020         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2021                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2022         /* Setup L2 cache */
2023         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2024                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2025                EFFECTIVE_L2_QUEUE_SIZE(7) |
2026                CONTEXT1_IDENTITY_ACCESS_MODE(1));
2027         WREG32(VM_L2_CNTL2, 0);
2028         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2029                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2030         radeon_gart_table_vram_unpin(rdev);
2031 }
2032
2033 void si_pcie_gart_fini(struct radeon_device *rdev)
2034 {
2035         si_pcie_gart_disable(rdev);
2036         radeon_gart_table_vram_free(rdev);
2037         radeon_gart_fini(rdev);
2038 }
2039
2040 /* vm parser */
2041 static bool si_vm_reg_valid(u32 reg)
2042 {
2043         /* context regs are fine */
2044         if (reg >= 0x28000)
2045                 return true;
2046
2047         /* check config regs */
2048         switch (reg) {
2049         case GRBM_GFX_INDEX:
2050         case VGT_VTX_VECT_EJECT_REG:
2051         case VGT_CACHE_INVALIDATION:
2052         case VGT_ESGS_RING_SIZE:
2053         case VGT_GSVS_RING_SIZE:
2054         case VGT_GS_VERTEX_REUSE:
2055         case VGT_PRIMITIVE_TYPE:
2056         case VGT_INDEX_TYPE:
2057         case VGT_NUM_INDICES:
2058         case VGT_NUM_INSTANCES:
2059         case VGT_TF_RING_SIZE:
2060         case VGT_HS_OFFCHIP_PARAM:
2061         case VGT_TF_MEMORY_BASE:
2062         case PA_CL_ENHANCE:
2063         case PA_SU_LINE_STIPPLE_VALUE:
2064         case PA_SC_LINE_STIPPLE_STATE:
2065         case PA_SC_ENHANCE:
2066         case SQC_CACHES:
2067         case SPI_STATIC_THREAD_MGMT_1:
2068         case SPI_STATIC_THREAD_MGMT_2:
2069         case SPI_STATIC_THREAD_MGMT_3:
2070         case SPI_PS_MAX_WAVE_ID:
2071         case SPI_CONFIG_CNTL:
2072         case SPI_CONFIG_CNTL_1:
2073         case TA_CNTL_AUX:
2074                 return true;
2075         default:
2076                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2077                 return false;
2078         }
2079 }
2080
2081 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2082                                   u32 *ib, struct radeon_cs_packet *pkt)
2083 {
2084         switch (pkt->opcode) {
2085         case PACKET3_NOP:
2086         case PACKET3_SET_BASE:
2087         case PACKET3_SET_CE_DE_COUNTERS:
2088         case PACKET3_LOAD_CONST_RAM:
2089         case PACKET3_WRITE_CONST_RAM:
2090         case PACKET3_WRITE_CONST_RAM_OFFSET:
2091         case PACKET3_DUMP_CONST_RAM:
2092         case PACKET3_INCREMENT_CE_COUNTER:
2093         case PACKET3_WAIT_ON_DE_COUNTER:
2094         case PACKET3_CE_WRITE:
2095                 break;
2096         default:
2097                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2098                 return -EINVAL;
2099         }
2100         return 0;
2101 }
2102
2103 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2104                                    u32 *ib, struct radeon_cs_packet *pkt)
2105 {
2106         u32 idx = pkt->idx + 1;
2107         u32 idx_value = ib[idx];
2108         u32 start_reg, end_reg, reg, i;
2109
2110         switch (pkt->opcode) {
2111         case PACKET3_NOP:
2112         case PACKET3_SET_BASE:
2113         case PACKET3_CLEAR_STATE:
2114         case PACKET3_INDEX_BUFFER_SIZE:
2115         case PACKET3_DISPATCH_DIRECT:
2116         case PACKET3_DISPATCH_INDIRECT:
2117         case PACKET3_ALLOC_GDS:
2118         case PACKET3_WRITE_GDS_RAM:
2119         case PACKET3_ATOMIC_GDS:
2120         case PACKET3_ATOMIC:
2121         case PACKET3_OCCLUSION_QUERY:
2122         case PACKET3_SET_PREDICATION:
2123         case PACKET3_COND_EXEC:
2124         case PACKET3_PRED_EXEC:
2125         case PACKET3_DRAW_INDIRECT:
2126         case PACKET3_DRAW_INDEX_INDIRECT:
2127         case PACKET3_INDEX_BASE:
2128         case PACKET3_DRAW_INDEX_2:
2129         case PACKET3_CONTEXT_CONTROL:
2130         case PACKET3_INDEX_TYPE:
2131         case PACKET3_DRAW_INDIRECT_MULTI:
2132         case PACKET3_DRAW_INDEX_AUTO:
2133         case PACKET3_DRAW_INDEX_IMMD:
2134         case PACKET3_NUM_INSTANCES:
2135         case PACKET3_DRAW_INDEX_MULTI_AUTO:
2136         case PACKET3_STRMOUT_BUFFER_UPDATE:
2137         case PACKET3_DRAW_INDEX_OFFSET_2:
2138         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2139         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2140         case PACKET3_MPEG_INDEX:
2141         case PACKET3_WAIT_REG_MEM:
2142         case PACKET3_MEM_WRITE:
2143         case PACKET3_PFP_SYNC_ME:
2144         case PACKET3_SURFACE_SYNC:
2145         case PACKET3_EVENT_WRITE:
2146         case PACKET3_EVENT_WRITE_EOP:
2147         case PACKET3_EVENT_WRITE_EOS:
2148         case PACKET3_SET_CONTEXT_REG:
2149         case PACKET3_SET_CONTEXT_REG_INDIRECT:
2150         case PACKET3_SET_SH_REG:
2151         case PACKET3_SET_SH_REG_OFFSET:
2152         case PACKET3_INCREMENT_DE_COUNTER:
2153         case PACKET3_WAIT_ON_CE_COUNTER:
2154         case PACKET3_WAIT_ON_AVAIL_BUFFER:
2155         case PACKET3_ME_WRITE:
2156                 break;
2157         case PACKET3_COPY_DATA:
2158                 if ((idx_value & 0xf00) == 0) {
2159                         reg = ib[idx + 3] * 4;
2160                         if (!si_vm_reg_valid(reg))
2161                                 return -EINVAL;
2162                 }
2163                 break;
2164         case PACKET3_WRITE_DATA:
2165                 if ((idx_value & 0xf00) == 0) {
2166                         start_reg = ib[idx + 1] * 4;
2167                         if (idx_value & 0x10000) {
2168                                 if (!si_vm_reg_valid(start_reg))
2169                                         return -EINVAL;
2170                         } else {
2171                                 for (i = 0; i < (pkt->count - 2); i++) {
2172                                         reg = start_reg + (4 * i);
2173                                         if (!si_vm_reg_valid(reg))
2174                                                 return -EINVAL;
2175                                 }
2176                         }
2177                 }
2178                 break;
2179         case PACKET3_COND_WRITE:
2180                 if (idx_value & 0x100) {
2181                         reg = ib[idx + 5] * 4;
2182                         if (!si_vm_reg_valid(reg))
2183                                 return -EINVAL;
2184                 }
2185                 break;
2186         case PACKET3_COPY_DW:
2187                 if (idx_value & 0x2) {
2188                         reg = ib[idx + 3] * 4;
2189                         if (!si_vm_reg_valid(reg))
2190                                 return -EINVAL;
2191                 }
2192                 break;
2193         case PACKET3_SET_CONFIG_REG:
2194                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2195                 end_reg = 4 * pkt->count + start_reg - 4;
2196                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2197                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2198                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2199                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2200                         return -EINVAL;
2201                 }
2202                 for (i = 0; i < pkt->count; i++) {
2203                         reg = start_reg + (4 * i);
2204                         if (!si_vm_reg_valid(reg))
2205                                 return -EINVAL;
2206                 }
2207                 break;
2208         default:
2209                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2210                 return -EINVAL;
2211         }
2212         return 0;
2213 }
2214
2215 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2216                                        u32 *ib, struct radeon_cs_packet *pkt)
2217 {
2218         u32 idx = pkt->idx + 1;
2219         u32 idx_value = ib[idx];
2220         u32 start_reg, reg, i;
2221
2222         switch (pkt->opcode) {
2223         case PACKET3_NOP:
2224         case PACKET3_SET_BASE:
2225         case PACKET3_CLEAR_STATE:
2226         case PACKET3_DISPATCH_DIRECT:
2227         case PACKET3_DISPATCH_INDIRECT:
2228         case PACKET3_ALLOC_GDS:
2229         case PACKET3_WRITE_GDS_RAM:
2230         case PACKET3_ATOMIC_GDS:
2231         case PACKET3_ATOMIC:
2232         case PACKET3_OCCLUSION_QUERY:
2233         case PACKET3_SET_PREDICATION:
2234         case PACKET3_COND_EXEC:
2235         case PACKET3_PRED_EXEC:
2236         case PACKET3_CONTEXT_CONTROL:
2237         case PACKET3_STRMOUT_BUFFER_UPDATE:
2238         case PACKET3_WAIT_REG_MEM:
2239         case PACKET3_MEM_WRITE:
2240         case PACKET3_PFP_SYNC_ME:
2241         case PACKET3_SURFACE_SYNC:
2242         case PACKET3_EVENT_WRITE:
2243         case PACKET3_EVENT_WRITE_EOP:
2244         case PACKET3_EVENT_WRITE_EOS:
2245         case PACKET3_SET_CONTEXT_REG:
2246         case PACKET3_SET_CONTEXT_REG_INDIRECT:
2247         case PACKET3_SET_SH_REG:
2248         case PACKET3_SET_SH_REG_OFFSET:
2249         case PACKET3_INCREMENT_DE_COUNTER:
2250         case PACKET3_WAIT_ON_CE_COUNTER:
2251         case PACKET3_WAIT_ON_AVAIL_BUFFER:
2252         case PACKET3_ME_WRITE:
2253                 break;
2254         case PACKET3_COPY_DATA:
2255                 if ((idx_value & 0xf00) == 0) {
2256                         reg = ib[idx + 3] * 4;
2257                         if (!si_vm_reg_valid(reg))
2258                                 return -EINVAL;
2259                 }
2260                 break;
2261         case PACKET3_WRITE_DATA:
2262                 if ((idx_value & 0xf00) == 0) {
2263                         start_reg = ib[idx + 1] * 4;
2264                         if (idx_value & 0x10000) {
2265                                 if (!si_vm_reg_valid(start_reg))
2266                                         return -EINVAL;
2267                         } else {
2268                                 for (i = 0; i < (pkt->count - 2); i++) {
2269                                         reg = start_reg + (4 * i);
2270                                         if (!si_vm_reg_valid(reg))
2271                                                 return -EINVAL;
2272                                 }
2273                         }
2274                 }
2275                 break;
2276         case PACKET3_COND_WRITE:
2277                 if (idx_value & 0x100) {
2278                         reg = ib[idx + 5] * 4;
2279                         if (!si_vm_reg_valid(reg))
2280                                 return -EINVAL;
2281                 }
2282                 break;
2283         case PACKET3_COPY_DW:
2284                 if (idx_value & 0x2) {
2285                         reg = ib[idx + 3] * 4;
2286                         if (!si_vm_reg_valid(reg))
2287                                 return -EINVAL;
2288                 }
2289                 break;
2290         default:
2291                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2292                 return -EINVAL;
2293         }
2294         return 0;
2295 }
2296
2297 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2298 {
2299         int ret = 0;
2300         u32 idx = 0;
2301         struct radeon_cs_packet pkt;
2302
2303         do {
2304                 pkt.idx = idx;
2305                 pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2306                 pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2307                 pkt.one_reg_wr = 0;
2308                 switch (pkt.type) {
2309                 case PACKET_TYPE0:
2310                         dev_err(rdev->dev, "Packet0 not allowed!\n");
2311                         ret = -EINVAL;
2312                         break;
2313                 case PACKET_TYPE2:
2314                         idx += 1;
2315                         break;
2316                 case PACKET_TYPE3:
2317                         pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2318                         if (ib->is_const_ib)
2319                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2320                         else {
2321                                 switch (ib->fence->ring) {
2322                                 case RADEON_RING_TYPE_GFX_INDEX:
2323                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2324                                         break;
2325                                 case CAYMAN_RING_TYPE_CP1_INDEX:
2326                                 case CAYMAN_RING_TYPE_CP2_INDEX:
2327                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2328                                         break;
2329                                 default:
2330                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
2331                                         ret = -EINVAL;
2332                                         break;
2333                                 }
2334                         }
2335                         idx += pkt.count + 2;
2336                         break;
2337                 default:
2338                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2339                         ret = -EINVAL;
2340                         break;
2341                 }
2342                 if (ret)
2343                         break;
2344         } while (idx < ib->length_dw);
2345
2346         return ret;
2347 }
2348
2349 /*
2350  * vm
2351  */
2352 int si_vm_init(struct radeon_device *rdev)
2353 {
2354         /* number of VMs */
2355         rdev->vm_manager.nvm = 16;
2356         /* base offset of vram pages */
2357         rdev->vm_manager.vram_base_offset = 0;
2358
2359         return 0;
2360 }
2361
2362 void si_vm_fini(struct radeon_device *rdev)
2363 {
2364 }
2365
2366 int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
2367 {
2368         if (id < 8)
2369                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
2370         else
2371                 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
2372                        vm->pt_gpu_addr >> 12);
2373         /* flush hdp cache */
2374         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2375         /* bits 0-15 are the VM contexts0-15 */
2376         WREG32(VM_INVALIDATE_REQUEST, 1 << id);
2377         return 0;
2378 }
2379
2380 void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
2381 {
2382         if (vm->id < 8)
2383                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
2384         else
2385                 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
2386         /* flush hdp cache */
2387         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2388         /* bits 0-15 are the VM contexts0-15 */
2389         WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2390 }
2391
2392 void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
2393 {
2394         if (vm->id == -1)
2395                 return;
2396
2397         /* flush hdp cache */
2398         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2399         /* bits 0-15 are the VM contexts0-15 */
2400         WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2401 }
2402