drm/radeon/cik: enable/disable vce cg when encoding v2
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
81 static void cik_program_aspm(struct radeon_device *rdev);
82 static void cik_init_pg(struct radeon_device *rdev);
83 static void cik_init_cg(struct radeon_device *rdev);
84 static void cik_fini_pg(struct radeon_device *rdev);
85 static void cik_fini_cg(struct radeon_device *rdev);
86 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
87                                           bool enable);
88
89 /* get temperature in millidegrees */
90 int ci_get_temp(struct radeon_device *rdev)
91 {
92         u32 temp;
93         int actual_temp = 0;
94
95         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
96                 CTF_TEMP_SHIFT;
97
98         if (temp & 0x200)
99                 actual_temp = 255;
100         else
101                 actual_temp = temp & 0x1ff;
102
103         actual_temp = actual_temp * 1000;
104
105         return actual_temp;
106 }
107
108 /* get temperature in millidegrees */
109 int kv_get_temp(struct radeon_device *rdev)
110 {
111         u32 temp;
112         int actual_temp = 0;
113
114         temp = RREG32_SMC(0xC0300E0C);
115
116         if (temp)
117                 actual_temp = (temp / 8) - 49;
118         else
119                 actual_temp = 0;
120
121         actual_temp = actual_temp * 1000;
122
123         return actual_temp;
124 }
125
126 /*
127  * Indirect registers accessor
128  */
129 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
130 {
131         unsigned long flags;
132         u32 r;
133
134         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
135         WREG32(PCIE_INDEX, reg);
136         (void)RREG32(PCIE_INDEX);
137         r = RREG32(PCIE_DATA);
138         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
139         return r;
140 }
141
142 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
143 {
144         unsigned long flags;
145
146         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
147         WREG32(PCIE_INDEX, reg);
148         (void)RREG32(PCIE_INDEX);
149         WREG32(PCIE_DATA, v);
150         (void)RREG32(PCIE_DATA);
151         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
152 }
153
154 static const u32 spectre_rlc_save_restore_register_list[] =
155 {
156         (0x0e00 << 16) | (0xc12c >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0xc140 >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0xc150 >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0xc15c >> 2),
163         0x00000000,
164         (0x0e00 << 16) | (0xc168 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0xc170 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0xc178 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc204 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0xc2b4 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0xc2b8 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0xc2bc >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0xc2c0 >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0x8228 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0x829c >> 2),
183         0x00000000,
184         (0x0e00 << 16) | (0x869c >> 2),
185         0x00000000,
186         (0x0600 << 16) | (0x98f4 >> 2),
187         0x00000000,
188         (0x0e00 << 16) | (0x98f8 >> 2),
189         0x00000000,
190         (0x0e00 << 16) | (0x9900 >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0xc260 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0x90e8 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0x3c000 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0x3c00c >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0x8c1c >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0x9700 >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0xcd20 >> 2),
205         0x00000000,
206         (0x4e00 << 16) | (0xcd20 >> 2),
207         0x00000000,
208         (0x5e00 << 16) | (0xcd20 >> 2),
209         0x00000000,
210         (0x6e00 << 16) | (0xcd20 >> 2),
211         0x00000000,
212         (0x7e00 << 16) | (0xcd20 >> 2),
213         0x00000000,
214         (0x8e00 << 16) | (0xcd20 >> 2),
215         0x00000000,
216         (0x9e00 << 16) | (0xcd20 >> 2),
217         0x00000000,
218         (0xae00 << 16) | (0xcd20 >> 2),
219         0x00000000,
220         (0xbe00 << 16) | (0xcd20 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0x89bc >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x8900 >> 2),
225         0x00000000,
226         0x3,
227         (0x0e00 << 16) | (0xc130 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc134 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc1fc >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc208 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc264 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc268 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc26c >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc270 >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc274 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc278 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc27c >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc280 >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc284 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0xc288 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0xc28c >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0xc290 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0xc294 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0xc298 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc29c >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc2a0 >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc2a4 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc2a8 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc2ac  >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc2b0 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0x301d0 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0x30238 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0x30250 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0x30254 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0x30258 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0x3025c >> 2),
286         0x00000000,
287         (0x4e00 << 16) | (0xc900 >> 2),
288         0x00000000,
289         (0x5e00 << 16) | (0xc900 >> 2),
290         0x00000000,
291         (0x6e00 << 16) | (0xc900 >> 2),
292         0x00000000,
293         (0x7e00 << 16) | (0xc900 >> 2),
294         0x00000000,
295         (0x8e00 << 16) | (0xc900 >> 2),
296         0x00000000,
297         (0x9e00 << 16) | (0xc900 >> 2),
298         0x00000000,
299         (0xae00 << 16) | (0xc900 >> 2),
300         0x00000000,
301         (0xbe00 << 16) | (0xc900 >> 2),
302         0x00000000,
303         (0x4e00 << 16) | (0xc904 >> 2),
304         0x00000000,
305         (0x5e00 << 16) | (0xc904 >> 2),
306         0x00000000,
307         (0x6e00 << 16) | (0xc904 >> 2),
308         0x00000000,
309         (0x7e00 << 16) | (0xc904 >> 2),
310         0x00000000,
311         (0x8e00 << 16) | (0xc904 >> 2),
312         0x00000000,
313         (0x9e00 << 16) | (0xc904 >> 2),
314         0x00000000,
315         (0xae00 << 16) | (0xc904 >> 2),
316         0x00000000,
317         (0xbe00 << 16) | (0xc904 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xc908 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xc908 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xc908 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xc908 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xc908 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xc908 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xc908 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xc908 >> 2),
334         0x00000000,
335         (0x4e00 << 16) | (0xc90c >> 2),
336         0x00000000,
337         (0x5e00 << 16) | (0xc90c >> 2),
338         0x00000000,
339         (0x6e00 << 16) | (0xc90c >> 2),
340         0x00000000,
341         (0x7e00 << 16) | (0xc90c >> 2),
342         0x00000000,
343         (0x8e00 << 16) | (0xc90c >> 2),
344         0x00000000,
345         (0x9e00 << 16) | (0xc90c >> 2),
346         0x00000000,
347         (0xae00 << 16) | (0xc90c >> 2),
348         0x00000000,
349         (0xbe00 << 16) | (0xc90c >> 2),
350         0x00000000,
351         (0x4e00 << 16) | (0xc910 >> 2),
352         0x00000000,
353         (0x5e00 << 16) | (0xc910 >> 2),
354         0x00000000,
355         (0x6e00 << 16) | (0xc910 >> 2),
356         0x00000000,
357         (0x7e00 << 16) | (0xc910 >> 2),
358         0x00000000,
359         (0x8e00 << 16) | (0xc910 >> 2),
360         0x00000000,
361         (0x9e00 << 16) | (0xc910 >> 2),
362         0x00000000,
363         (0xae00 << 16) | (0xc910 >> 2),
364         0x00000000,
365         (0xbe00 << 16) | (0xc910 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc99c >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0x9834 >> 2),
370         0x00000000,
371         (0x0000 << 16) | (0x30f00 >> 2),
372         0x00000000,
373         (0x0001 << 16) | (0x30f00 >> 2),
374         0x00000000,
375         (0x0000 << 16) | (0x30f04 >> 2),
376         0x00000000,
377         (0x0001 << 16) | (0x30f04 >> 2),
378         0x00000000,
379         (0x0000 << 16) | (0x30f08 >> 2),
380         0x00000000,
381         (0x0001 << 16) | (0x30f08 >> 2),
382         0x00000000,
383         (0x0000 << 16) | (0x30f0c >> 2),
384         0x00000000,
385         (0x0001 << 16) | (0x30f0c >> 2),
386         0x00000000,
387         (0x0600 << 16) | (0x9b7c >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0x8a14 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0x8a18 >> 2),
392         0x00000000,
393         (0x0600 << 16) | (0x30a00 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x8bf0 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x8bcc >> 2),
398         0x00000000,
399         (0x0e00 << 16) | (0x8b24 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0x30a04 >> 2),
402         0x00000000,
403         (0x0600 << 16) | (0x30a10 >> 2),
404         0x00000000,
405         (0x0600 << 16) | (0x30a14 >> 2),
406         0x00000000,
407         (0x0600 << 16) | (0x30a18 >> 2),
408         0x00000000,
409         (0x0600 << 16) | (0x30a2c >> 2),
410         0x00000000,
411         (0x0e00 << 16) | (0xc700 >> 2),
412         0x00000000,
413         (0x0e00 << 16) | (0xc704 >> 2),
414         0x00000000,
415         (0x0e00 << 16) | (0xc708 >> 2),
416         0x00000000,
417         (0x0e00 << 16) | (0xc768 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc770 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc774 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc778 >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc77c >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc780 >> 2),
428         0x00000000,
429         (0x0400 << 16) | (0xc784 >> 2),
430         0x00000000,
431         (0x0400 << 16) | (0xc788 >> 2),
432         0x00000000,
433         (0x0400 << 16) | (0xc78c >> 2),
434         0x00000000,
435         (0x0400 << 16) | (0xc798 >> 2),
436         0x00000000,
437         (0x0400 << 16) | (0xc79c >> 2),
438         0x00000000,
439         (0x0400 << 16) | (0xc7a0 >> 2),
440         0x00000000,
441         (0x0400 << 16) | (0xc7a4 >> 2),
442         0x00000000,
443         (0x0400 << 16) | (0xc7a8 >> 2),
444         0x00000000,
445         (0x0400 << 16) | (0xc7ac >> 2),
446         0x00000000,
447         (0x0400 << 16) | (0xc7b0 >> 2),
448         0x00000000,
449         (0x0400 << 16) | (0xc7b4 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x9100 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x3c010 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x92a8 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x92ac >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x92b4 >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x92b8 >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0x92bc >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x92c0 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0x92c4 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0x92c8 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0x92cc >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0x92d0 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0x8c00 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8c04 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8c20 >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0x8c38 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x8c3c >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xae00 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x9604 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac08 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac0c >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac10 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac14 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0xac58 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xac68 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xac6c >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xac70 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xac74 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0xac78 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0xac7c >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0xac80 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0xac84 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0xac88 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0xac8c >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x970c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x9714 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x9718 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x971c >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x31068 >> 2),
528         0x00000000,
529         (0x4e00 << 16) | (0x31068 >> 2),
530         0x00000000,
531         (0x5e00 << 16) | (0x31068 >> 2),
532         0x00000000,
533         (0x6e00 << 16) | (0x31068 >> 2),
534         0x00000000,
535         (0x7e00 << 16) | (0x31068 >> 2),
536         0x00000000,
537         (0x8e00 << 16) | (0x31068 >> 2),
538         0x00000000,
539         (0x9e00 << 16) | (0x31068 >> 2),
540         0x00000000,
541         (0xae00 << 16) | (0x31068 >> 2),
542         0x00000000,
543         (0xbe00 << 16) | (0x31068 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xcd10 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xcd14 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x88b0 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x88b4 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x88b8 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x88bc >> 2),
556         0x00000000,
557         (0x0400 << 16) | (0x89c0 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x88c4 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x88c8 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x88d0 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x88d4 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x88d8 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x8980 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x30938 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x3093c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x30940 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x89a0 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x30900 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x30904 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x89b4 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x3c210 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x3c214 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x3c218 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x8904 >> 2),
592         0x00000000,
593         0x5,
594         (0x0e00 << 16) | (0x8c28 >> 2),
595         (0x0e00 << 16) | (0x8c2c >> 2),
596         (0x0e00 << 16) | (0x8c30 >> 2),
597         (0x0e00 << 16) | (0x8c34 >> 2),
598         (0x0e00 << 16) | (0x9600 >> 2),
599 };
600
601 static const u32 kalindi_rlc_save_restore_register_list[] =
602 {
603         (0x0e00 << 16) | (0xc12c >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xc140 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xc150 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xc15c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xc168 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xc170 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc204 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xc2b4 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xc2b8 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xc2bc >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xc2c0 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x8228 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x829c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x869c >> 2),
630         0x00000000,
631         (0x0600 << 16) | (0x98f4 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x98f8 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x9900 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0xc260 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x90e8 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c000 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c00c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8c1c >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x9700 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0xcd20 >> 2),
650         0x00000000,
651         (0x4e00 << 16) | (0xcd20 >> 2),
652         0x00000000,
653         (0x5e00 << 16) | (0xcd20 >> 2),
654         0x00000000,
655         (0x6e00 << 16) | (0xcd20 >> 2),
656         0x00000000,
657         (0x7e00 << 16) | (0xcd20 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x89bc >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x8900 >> 2),
662         0x00000000,
663         0x3,
664         (0x0e00 << 16) | (0xc130 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc134 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc1fc >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc208 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc264 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc268 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xc26c >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0xc270 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0xc274 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0xc28c >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0xc290 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xc294 >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0xc298 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0xc2a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0xc2a4 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0xc2a8 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0xc2ac >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x301d0 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x30238 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x30250 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x30254 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0x30258 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0x3025c >> 2),
709         0x00000000,
710         (0x4e00 << 16) | (0xc900 >> 2),
711         0x00000000,
712         (0x5e00 << 16) | (0xc900 >> 2),
713         0x00000000,
714         (0x6e00 << 16) | (0xc900 >> 2),
715         0x00000000,
716         (0x7e00 << 16) | (0xc900 >> 2),
717         0x00000000,
718         (0x4e00 << 16) | (0xc904 >> 2),
719         0x00000000,
720         (0x5e00 << 16) | (0xc904 >> 2),
721         0x00000000,
722         (0x6e00 << 16) | (0xc904 >> 2),
723         0x00000000,
724         (0x7e00 << 16) | (0xc904 >> 2),
725         0x00000000,
726         (0x4e00 << 16) | (0xc908 >> 2),
727         0x00000000,
728         (0x5e00 << 16) | (0xc908 >> 2),
729         0x00000000,
730         (0x6e00 << 16) | (0xc908 >> 2),
731         0x00000000,
732         (0x7e00 << 16) | (0xc908 >> 2),
733         0x00000000,
734         (0x4e00 << 16) | (0xc90c >> 2),
735         0x00000000,
736         (0x5e00 << 16) | (0xc90c >> 2),
737         0x00000000,
738         (0x6e00 << 16) | (0xc90c >> 2),
739         0x00000000,
740         (0x7e00 << 16) | (0xc90c >> 2),
741         0x00000000,
742         (0x4e00 << 16) | (0xc910 >> 2),
743         0x00000000,
744         (0x5e00 << 16) | (0xc910 >> 2),
745         0x00000000,
746         (0x6e00 << 16) | (0xc910 >> 2),
747         0x00000000,
748         (0x7e00 << 16) | (0xc910 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc99c >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x9834 >> 2),
753         0x00000000,
754         (0x0000 << 16) | (0x30f00 >> 2),
755         0x00000000,
756         (0x0000 << 16) | (0x30f04 >> 2),
757         0x00000000,
758         (0x0000 << 16) | (0x30f08 >> 2),
759         0x00000000,
760         (0x0000 << 16) | (0x30f0c >> 2),
761         0x00000000,
762         (0x0600 << 16) | (0x9b7c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0x8a14 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0x8a18 >> 2),
767         0x00000000,
768         (0x0600 << 16) | (0x30a00 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0x8bf0 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x8bcc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8b24 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0x30a04 >> 2),
777         0x00000000,
778         (0x0600 << 16) | (0x30a10 >> 2),
779         0x00000000,
780         (0x0600 << 16) | (0x30a14 >> 2),
781         0x00000000,
782         (0x0600 << 16) | (0x30a18 >> 2),
783         0x00000000,
784         (0x0600 << 16) | (0x30a2c >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc700 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc704 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc708 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc768 >> 2),
793         0x00000000,
794         (0x0400 << 16) | (0xc770 >> 2),
795         0x00000000,
796         (0x0400 << 16) | (0xc774 >> 2),
797         0x00000000,
798         (0x0400 << 16) | (0xc798 >> 2),
799         0x00000000,
800         (0x0400 << 16) | (0xc79c >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x9100 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x3c010 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8c00 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8c04 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8c20 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x8c38 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0x8c3c >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xae00 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x9604 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac08 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac0c >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac10 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac14 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xac58 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0xac68 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0xac6c >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0xac70 >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0xac74 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xac78 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xac7c >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xac80 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xac84 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xac88 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0xac8c >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x970c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x9714 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x9718 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x971c >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x31068 >> 2),
859         0x00000000,
860         (0x4e00 << 16) | (0x31068 >> 2),
861         0x00000000,
862         (0x5e00 << 16) | (0x31068 >> 2),
863         0x00000000,
864         (0x6e00 << 16) | (0x31068 >> 2),
865         0x00000000,
866         (0x7e00 << 16) | (0x31068 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xcd10 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xcd14 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x88b0 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x88b4 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x88b8 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x88bc >> 2),
879         0x00000000,
880         (0x0400 << 16) | (0x89c0 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x88c4 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x88c8 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x88d0 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x88d4 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x88d8 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x8980 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30938 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x3093c >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x30940 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x89a0 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x30900 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x30904 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x89b4 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x3e1fc >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x3c210 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x3c214 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x3c218 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x8904 >> 2),
917         0x00000000,
918         0x5,
919         (0x0e00 << 16) | (0x8c28 >> 2),
920         (0x0e00 << 16) | (0x8c2c >> 2),
921         (0x0e00 << 16) | (0x8c30 >> 2),
922         (0x0e00 << 16) | (0x8c34 >> 2),
923         (0x0e00 << 16) | (0x9600 >> 2),
924 };
925
926 static const u32 bonaire_golden_spm_registers[] =
927 {
928         0x30800, 0xe0ffffff, 0xe0000000
929 };
930
931 static const u32 bonaire_golden_common_registers[] =
932 {
933         0xc770, 0xffffffff, 0x00000800,
934         0xc774, 0xffffffff, 0x00000800,
935         0xc798, 0xffffffff, 0x00007fbf,
936         0xc79c, 0xffffffff, 0x00007faf
937 };
938
939 static const u32 bonaire_golden_registers[] =
940 {
941         0x3354, 0x00000333, 0x00000333,
942         0x3350, 0x000c0fc0, 0x00040200,
943         0x9a10, 0x00010000, 0x00058208,
944         0x3c000, 0xffff1fff, 0x00140000,
945         0x3c200, 0xfdfc0fff, 0x00000100,
946         0x3c234, 0x40000000, 0x40000200,
947         0x9830, 0xffffffff, 0x00000000,
948         0x9834, 0xf00fffff, 0x00000400,
949         0x9838, 0x0002021c, 0x00020200,
950         0xc78, 0x00000080, 0x00000000,
951         0x5bb0, 0x000000f0, 0x00000070,
952         0x5bc0, 0xf0311fff, 0x80300000,
953         0x98f8, 0x73773777, 0x12010001,
954         0x350c, 0x00810000, 0x408af000,
955         0x7030, 0x31000111, 0x00000011,
956         0x2f48, 0x73773777, 0x12010001,
957         0x220c, 0x00007fb6, 0x0021a1b1,
958         0x2210, 0x00007fb6, 0x002021b1,
959         0x2180, 0x00007fb6, 0x00002191,
960         0x2218, 0x00007fb6, 0x002121b1,
961         0x221c, 0x00007fb6, 0x002021b1,
962         0x21dc, 0x00007fb6, 0x00002191,
963         0x21e0, 0x00007fb6, 0x00002191,
964         0x3628, 0x0000003f, 0x0000000a,
965         0x362c, 0x0000003f, 0x0000000a,
966         0x2ae4, 0x00073ffe, 0x000022a2,
967         0x240c, 0x000007ff, 0x00000000,
968         0x8a14, 0xf000003f, 0x00000007,
969         0x8bf0, 0x00002001, 0x00000001,
970         0x8b24, 0xffffffff, 0x00ffffff,
971         0x30a04, 0x0000ff0f, 0x00000000,
972         0x28a4c, 0x07ffffff, 0x06000000,
973         0x4d8, 0x00000fff, 0x00000100,
974         0x3e78, 0x00000001, 0x00000002,
975         0x9100, 0x03000000, 0x0362c688,
976         0x8c00, 0x000000ff, 0x00000001,
977         0xe40, 0x00001fff, 0x00001fff,
978         0x9060, 0x0000007f, 0x00000020,
979         0x9508, 0x00010000, 0x00010000,
980         0xac14, 0x000003ff, 0x000000f3,
981         0xac0c, 0xffffffff, 0x00001032
982 };
983
984 static const u32 bonaire_mgcg_cgcg_init[] =
985 {
986         0xc420, 0xffffffff, 0xfffffffc,
987         0x30800, 0xffffffff, 0xe0000000,
988         0x3c2a0, 0xffffffff, 0x00000100,
989         0x3c208, 0xffffffff, 0x00000100,
990         0x3c2c0, 0xffffffff, 0xc0000100,
991         0x3c2c8, 0xffffffff, 0xc0000100,
992         0x3c2c4, 0xffffffff, 0xc0000100,
993         0x55e4, 0xffffffff, 0x00600100,
994         0x3c280, 0xffffffff, 0x00000100,
995         0x3c214, 0xffffffff, 0x06000100,
996         0x3c220, 0xffffffff, 0x00000100,
997         0x3c218, 0xffffffff, 0x06000100,
998         0x3c204, 0xffffffff, 0x00000100,
999         0x3c2e0, 0xffffffff, 0x00000100,
1000         0x3c224, 0xffffffff, 0x00000100,
1001         0x3c200, 0xffffffff, 0x00000100,
1002         0x3c230, 0xffffffff, 0x00000100,
1003         0x3c234, 0xffffffff, 0x00000100,
1004         0x3c250, 0xffffffff, 0x00000100,
1005         0x3c254, 0xffffffff, 0x00000100,
1006         0x3c258, 0xffffffff, 0x00000100,
1007         0x3c25c, 0xffffffff, 0x00000100,
1008         0x3c260, 0xffffffff, 0x00000100,
1009         0x3c27c, 0xffffffff, 0x00000100,
1010         0x3c278, 0xffffffff, 0x00000100,
1011         0x3c210, 0xffffffff, 0x06000100,
1012         0x3c290, 0xffffffff, 0x00000100,
1013         0x3c274, 0xffffffff, 0x00000100,
1014         0x3c2b4, 0xffffffff, 0x00000100,
1015         0x3c2b0, 0xffffffff, 0x00000100,
1016         0x3c270, 0xffffffff, 0x00000100,
1017         0x30800, 0xffffffff, 0xe0000000,
1018         0x3c020, 0xffffffff, 0x00010000,
1019         0x3c024, 0xffffffff, 0x00030002,
1020         0x3c028, 0xffffffff, 0x00040007,
1021         0x3c02c, 0xffffffff, 0x00060005,
1022         0x3c030, 0xffffffff, 0x00090008,
1023         0x3c034, 0xffffffff, 0x00010000,
1024         0x3c038, 0xffffffff, 0x00030002,
1025         0x3c03c, 0xffffffff, 0x00040007,
1026         0x3c040, 0xffffffff, 0x00060005,
1027         0x3c044, 0xffffffff, 0x00090008,
1028         0x3c048, 0xffffffff, 0x00010000,
1029         0x3c04c, 0xffffffff, 0x00030002,
1030         0x3c050, 0xffffffff, 0x00040007,
1031         0x3c054, 0xffffffff, 0x00060005,
1032         0x3c058, 0xffffffff, 0x00090008,
1033         0x3c05c, 0xffffffff, 0x00010000,
1034         0x3c060, 0xffffffff, 0x00030002,
1035         0x3c064, 0xffffffff, 0x00040007,
1036         0x3c068, 0xffffffff, 0x00060005,
1037         0x3c06c, 0xffffffff, 0x00090008,
1038         0x3c070, 0xffffffff, 0x00010000,
1039         0x3c074, 0xffffffff, 0x00030002,
1040         0x3c078, 0xffffffff, 0x00040007,
1041         0x3c07c, 0xffffffff, 0x00060005,
1042         0x3c080, 0xffffffff, 0x00090008,
1043         0x3c084, 0xffffffff, 0x00010000,
1044         0x3c088, 0xffffffff, 0x00030002,
1045         0x3c08c, 0xffffffff, 0x00040007,
1046         0x3c090, 0xffffffff, 0x00060005,
1047         0x3c094, 0xffffffff, 0x00090008,
1048         0x3c098, 0xffffffff, 0x00010000,
1049         0x3c09c, 0xffffffff, 0x00030002,
1050         0x3c0a0, 0xffffffff, 0x00040007,
1051         0x3c0a4, 0xffffffff, 0x00060005,
1052         0x3c0a8, 0xffffffff, 0x00090008,
1053         0x3c000, 0xffffffff, 0x96e00200,
1054         0x8708, 0xffffffff, 0x00900100,
1055         0xc424, 0xffffffff, 0x0020003f,
1056         0x38, 0xffffffff, 0x0140001c,
1057         0x3c, 0x000f0000, 0x000f0000,
1058         0x220, 0xffffffff, 0xC060000C,
1059         0x224, 0xc0000fff, 0x00000100,
1060         0xf90, 0xffffffff, 0x00000100,
1061         0xf98, 0x00000101, 0x00000000,
1062         0x20a8, 0xffffffff, 0x00000104,
1063         0x55e4, 0xff000fff, 0x00000100,
1064         0x30cc, 0xc0000fff, 0x00000104,
1065         0xc1e4, 0x00000001, 0x00000001,
1066         0xd00c, 0xff000ff0, 0x00000100,
1067         0xd80c, 0xff000ff0, 0x00000100
1068 };
1069
1070 static const u32 spectre_golden_spm_registers[] =
1071 {
1072         0x30800, 0xe0ffffff, 0xe0000000
1073 };
1074
1075 static const u32 spectre_golden_common_registers[] =
1076 {
1077         0xc770, 0xffffffff, 0x00000800,
1078         0xc774, 0xffffffff, 0x00000800,
1079         0xc798, 0xffffffff, 0x00007fbf,
1080         0xc79c, 0xffffffff, 0x00007faf
1081 };
1082
1083 static const u32 spectre_golden_registers[] =
1084 {
1085         0x3c000, 0xffff1fff, 0x96940200,
1086         0x3c00c, 0xffff0001, 0xff000000,
1087         0x3c200, 0xfffc0fff, 0x00000100,
1088         0x6ed8, 0x00010101, 0x00010000,
1089         0x9834, 0xf00fffff, 0x00000400,
1090         0x9838, 0xfffffffc, 0x00020200,
1091         0x5bb0, 0x000000f0, 0x00000070,
1092         0x5bc0, 0xf0311fff, 0x80300000,
1093         0x98f8, 0x73773777, 0x12010001,
1094         0x9b7c, 0x00ff0000, 0x00fc0000,
1095         0x2f48, 0x73773777, 0x12010001,
1096         0x8a14, 0xf000003f, 0x00000007,
1097         0x8b24, 0xffffffff, 0x00ffffff,
1098         0x28350, 0x3f3f3fff, 0x00000082,
1099         0x28355, 0x0000003f, 0x00000000,
1100         0x3e78, 0x00000001, 0x00000002,
1101         0x913c, 0xffff03df, 0x00000004,
1102         0xc768, 0x00000008, 0x00000008,
1103         0x8c00, 0x000008ff, 0x00000800,
1104         0x9508, 0x00010000, 0x00010000,
1105         0xac0c, 0xffffffff, 0x54763210,
1106         0x214f8, 0x01ff01ff, 0x00000002,
1107         0x21498, 0x007ff800, 0x00200000,
1108         0x2015c, 0xffffffff, 0x00000f40,
1109         0x30934, 0xffffffff, 0x00000001
1110 };
1111
1112 static const u32 spectre_mgcg_cgcg_init[] =
1113 {
1114         0xc420, 0xffffffff, 0xfffffffc,
1115         0x30800, 0xffffffff, 0xe0000000,
1116         0x3c2a0, 0xffffffff, 0x00000100,
1117         0x3c208, 0xffffffff, 0x00000100,
1118         0x3c2c0, 0xffffffff, 0x00000100,
1119         0x3c2c8, 0xffffffff, 0x00000100,
1120         0x3c2c4, 0xffffffff, 0x00000100,
1121         0x55e4, 0xffffffff, 0x00600100,
1122         0x3c280, 0xffffffff, 0x00000100,
1123         0x3c214, 0xffffffff, 0x06000100,
1124         0x3c220, 0xffffffff, 0x00000100,
1125         0x3c218, 0xffffffff, 0x06000100,
1126         0x3c204, 0xffffffff, 0x00000100,
1127         0x3c2e0, 0xffffffff, 0x00000100,
1128         0x3c224, 0xffffffff, 0x00000100,
1129         0x3c200, 0xffffffff, 0x00000100,
1130         0x3c230, 0xffffffff, 0x00000100,
1131         0x3c234, 0xffffffff, 0x00000100,
1132         0x3c250, 0xffffffff, 0x00000100,
1133         0x3c254, 0xffffffff, 0x00000100,
1134         0x3c258, 0xffffffff, 0x00000100,
1135         0x3c25c, 0xffffffff, 0x00000100,
1136         0x3c260, 0xffffffff, 0x00000100,
1137         0x3c27c, 0xffffffff, 0x00000100,
1138         0x3c278, 0xffffffff, 0x00000100,
1139         0x3c210, 0xffffffff, 0x06000100,
1140         0x3c290, 0xffffffff, 0x00000100,
1141         0x3c274, 0xffffffff, 0x00000100,
1142         0x3c2b4, 0xffffffff, 0x00000100,
1143         0x3c2b0, 0xffffffff, 0x00000100,
1144         0x3c270, 0xffffffff, 0x00000100,
1145         0x30800, 0xffffffff, 0xe0000000,
1146         0x3c020, 0xffffffff, 0x00010000,
1147         0x3c024, 0xffffffff, 0x00030002,
1148         0x3c028, 0xffffffff, 0x00040007,
1149         0x3c02c, 0xffffffff, 0x00060005,
1150         0x3c030, 0xffffffff, 0x00090008,
1151         0x3c034, 0xffffffff, 0x00010000,
1152         0x3c038, 0xffffffff, 0x00030002,
1153         0x3c03c, 0xffffffff, 0x00040007,
1154         0x3c040, 0xffffffff, 0x00060005,
1155         0x3c044, 0xffffffff, 0x00090008,
1156         0x3c048, 0xffffffff, 0x00010000,
1157         0x3c04c, 0xffffffff, 0x00030002,
1158         0x3c050, 0xffffffff, 0x00040007,
1159         0x3c054, 0xffffffff, 0x00060005,
1160         0x3c058, 0xffffffff, 0x00090008,
1161         0x3c05c, 0xffffffff, 0x00010000,
1162         0x3c060, 0xffffffff, 0x00030002,
1163         0x3c064, 0xffffffff, 0x00040007,
1164         0x3c068, 0xffffffff, 0x00060005,
1165         0x3c06c, 0xffffffff, 0x00090008,
1166         0x3c070, 0xffffffff, 0x00010000,
1167         0x3c074, 0xffffffff, 0x00030002,
1168         0x3c078, 0xffffffff, 0x00040007,
1169         0x3c07c, 0xffffffff, 0x00060005,
1170         0x3c080, 0xffffffff, 0x00090008,
1171         0x3c084, 0xffffffff, 0x00010000,
1172         0x3c088, 0xffffffff, 0x00030002,
1173         0x3c08c, 0xffffffff, 0x00040007,
1174         0x3c090, 0xffffffff, 0x00060005,
1175         0x3c094, 0xffffffff, 0x00090008,
1176         0x3c098, 0xffffffff, 0x00010000,
1177         0x3c09c, 0xffffffff, 0x00030002,
1178         0x3c0a0, 0xffffffff, 0x00040007,
1179         0x3c0a4, 0xffffffff, 0x00060005,
1180         0x3c0a8, 0xffffffff, 0x00090008,
1181         0x3c0ac, 0xffffffff, 0x00010000,
1182         0x3c0b0, 0xffffffff, 0x00030002,
1183         0x3c0b4, 0xffffffff, 0x00040007,
1184         0x3c0b8, 0xffffffff, 0x00060005,
1185         0x3c0bc, 0xffffffff, 0x00090008,
1186         0x3c000, 0xffffffff, 0x96e00200,
1187         0x8708, 0xffffffff, 0x00900100,
1188         0xc424, 0xffffffff, 0x0020003f,
1189         0x38, 0xffffffff, 0x0140001c,
1190         0x3c, 0x000f0000, 0x000f0000,
1191         0x220, 0xffffffff, 0xC060000C,
1192         0x224, 0xc0000fff, 0x00000100,
1193         0xf90, 0xffffffff, 0x00000100,
1194         0xf98, 0x00000101, 0x00000000,
1195         0x20a8, 0xffffffff, 0x00000104,
1196         0x55e4, 0xff000fff, 0x00000100,
1197         0x30cc, 0xc0000fff, 0x00000104,
1198         0xc1e4, 0x00000001, 0x00000001,
1199         0xd00c, 0xff000ff0, 0x00000100,
1200         0xd80c, 0xff000ff0, 0x00000100
1201 };
1202
1203 static const u32 kalindi_golden_spm_registers[] =
1204 {
1205         0x30800, 0xe0ffffff, 0xe0000000
1206 };
1207
1208 static const u32 kalindi_golden_common_registers[] =
1209 {
1210         0xc770, 0xffffffff, 0x00000800,
1211         0xc774, 0xffffffff, 0x00000800,
1212         0xc798, 0xffffffff, 0x00007fbf,
1213         0xc79c, 0xffffffff, 0x00007faf
1214 };
1215
1216 static const u32 kalindi_golden_registers[] =
1217 {
1218         0x3c000, 0xffffdfff, 0x6e944040,
1219         0x55e4, 0xff607fff, 0xfc000100,
1220         0x3c220, 0xff000fff, 0x00000100,
1221         0x3c224, 0xff000fff, 0x00000100,
1222         0x3c200, 0xfffc0fff, 0x00000100,
1223         0x6ed8, 0x00010101, 0x00010000,
1224         0x9830, 0xffffffff, 0x00000000,
1225         0x9834, 0xf00fffff, 0x00000400,
1226         0x5bb0, 0x000000f0, 0x00000070,
1227         0x5bc0, 0xf0311fff, 0x80300000,
1228         0x98f8, 0x73773777, 0x12010001,
1229         0x98fc, 0xffffffff, 0x00000010,
1230         0x9b7c, 0x00ff0000, 0x00fc0000,
1231         0x8030, 0x00001f0f, 0x0000100a,
1232         0x2f48, 0x73773777, 0x12010001,
1233         0x2408, 0x000fffff, 0x000c007f,
1234         0x8a14, 0xf000003f, 0x00000007,
1235         0x8b24, 0x3fff3fff, 0x00ffcfff,
1236         0x30a04, 0x0000ff0f, 0x00000000,
1237         0x28a4c, 0x07ffffff, 0x06000000,
1238         0x4d8, 0x00000fff, 0x00000100,
1239         0x3e78, 0x00000001, 0x00000002,
1240         0xc768, 0x00000008, 0x00000008,
1241         0x8c00, 0x000000ff, 0x00000003,
1242         0x214f8, 0x01ff01ff, 0x00000002,
1243         0x21498, 0x007ff800, 0x00200000,
1244         0x2015c, 0xffffffff, 0x00000f40,
1245         0x88c4, 0x001f3ae3, 0x00000082,
1246         0x88d4, 0x0000001f, 0x00000010,
1247         0x30934, 0xffffffff, 0x00000000
1248 };
1249
1250 static const u32 kalindi_mgcg_cgcg_init[] =
1251 {
1252         0xc420, 0xffffffff, 0xfffffffc,
1253         0x30800, 0xffffffff, 0xe0000000,
1254         0x3c2a0, 0xffffffff, 0x00000100,
1255         0x3c208, 0xffffffff, 0x00000100,
1256         0x3c2c0, 0xffffffff, 0x00000100,
1257         0x3c2c8, 0xffffffff, 0x00000100,
1258         0x3c2c4, 0xffffffff, 0x00000100,
1259         0x55e4, 0xffffffff, 0x00600100,
1260         0x3c280, 0xffffffff, 0x00000100,
1261         0x3c214, 0xffffffff, 0x06000100,
1262         0x3c220, 0xffffffff, 0x00000100,
1263         0x3c218, 0xffffffff, 0x06000100,
1264         0x3c204, 0xffffffff, 0x00000100,
1265         0x3c2e0, 0xffffffff, 0x00000100,
1266         0x3c224, 0xffffffff, 0x00000100,
1267         0x3c200, 0xffffffff, 0x00000100,
1268         0x3c230, 0xffffffff, 0x00000100,
1269         0x3c234, 0xffffffff, 0x00000100,
1270         0x3c250, 0xffffffff, 0x00000100,
1271         0x3c254, 0xffffffff, 0x00000100,
1272         0x3c258, 0xffffffff, 0x00000100,
1273         0x3c25c, 0xffffffff, 0x00000100,
1274         0x3c260, 0xffffffff, 0x00000100,
1275         0x3c27c, 0xffffffff, 0x00000100,
1276         0x3c278, 0xffffffff, 0x00000100,
1277         0x3c210, 0xffffffff, 0x06000100,
1278         0x3c290, 0xffffffff, 0x00000100,
1279         0x3c274, 0xffffffff, 0x00000100,
1280         0x3c2b4, 0xffffffff, 0x00000100,
1281         0x3c2b0, 0xffffffff, 0x00000100,
1282         0x3c270, 0xffffffff, 0x00000100,
1283         0x30800, 0xffffffff, 0xe0000000,
1284         0x3c020, 0xffffffff, 0x00010000,
1285         0x3c024, 0xffffffff, 0x00030002,
1286         0x3c028, 0xffffffff, 0x00040007,
1287         0x3c02c, 0xffffffff, 0x00060005,
1288         0x3c030, 0xffffffff, 0x00090008,
1289         0x3c034, 0xffffffff, 0x00010000,
1290         0x3c038, 0xffffffff, 0x00030002,
1291         0x3c03c, 0xffffffff, 0x00040007,
1292         0x3c040, 0xffffffff, 0x00060005,
1293         0x3c044, 0xffffffff, 0x00090008,
1294         0x3c000, 0xffffffff, 0x96e00200,
1295         0x8708, 0xffffffff, 0x00900100,
1296         0xc424, 0xffffffff, 0x0020003f,
1297         0x38, 0xffffffff, 0x0140001c,
1298         0x3c, 0x000f0000, 0x000f0000,
1299         0x220, 0xffffffff, 0xC060000C,
1300         0x224, 0xc0000fff, 0x00000100,
1301         0x20a8, 0xffffffff, 0x00000104,
1302         0x55e4, 0xff000fff, 0x00000100,
1303         0x30cc, 0xc0000fff, 0x00000104,
1304         0xc1e4, 0x00000001, 0x00000001,
1305         0xd00c, 0xff000ff0, 0x00000100,
1306         0xd80c, 0xff000ff0, 0x00000100
1307 };
1308
1309 static const u32 hawaii_golden_spm_registers[] =
1310 {
1311         0x30800, 0xe0ffffff, 0xe0000000
1312 };
1313
1314 static const u32 hawaii_golden_common_registers[] =
1315 {
1316         0x30800, 0xffffffff, 0xe0000000,
1317         0x28350, 0xffffffff, 0x3a00161a,
1318         0x28354, 0xffffffff, 0x0000002e,
1319         0x9a10, 0xffffffff, 0x00018208,
1320         0x98f8, 0xffffffff, 0x12011003
1321 };
1322
1323 static const u32 hawaii_golden_registers[] =
1324 {
1325         0x3354, 0x00000333, 0x00000333,
1326         0x9a10, 0x00010000, 0x00058208,
1327         0x9830, 0xffffffff, 0x00000000,
1328         0x9834, 0xf00fffff, 0x00000400,
1329         0x9838, 0x0002021c, 0x00020200,
1330         0xc78, 0x00000080, 0x00000000,
1331         0x5bb0, 0x000000f0, 0x00000070,
1332         0x5bc0, 0xf0311fff, 0x80300000,
1333         0x350c, 0x00810000, 0x408af000,
1334         0x7030, 0x31000111, 0x00000011,
1335         0x2f48, 0x73773777, 0x12010001,
1336         0x2120, 0x0000007f, 0x0000001b,
1337         0x21dc, 0x00007fb6, 0x00002191,
1338         0x3628, 0x0000003f, 0x0000000a,
1339         0x362c, 0x0000003f, 0x0000000a,
1340         0x2ae4, 0x00073ffe, 0x000022a2,
1341         0x240c, 0x000007ff, 0x00000000,
1342         0x8bf0, 0x00002001, 0x00000001,
1343         0x8b24, 0xffffffff, 0x00ffffff,
1344         0x30a04, 0x0000ff0f, 0x00000000,
1345         0x28a4c, 0x07ffffff, 0x06000000,
1346         0x3e78, 0x00000001, 0x00000002,
1347         0xc768, 0x00000008, 0x00000008,
1348         0xc770, 0x00000f00, 0x00000800,
1349         0xc774, 0x00000f00, 0x00000800,
1350         0xc798, 0x00ffffff, 0x00ff7fbf,
1351         0xc79c, 0x00ffffff, 0x00ff7faf,
1352         0x8c00, 0x000000ff, 0x00000800,
1353         0xe40, 0x00001fff, 0x00001fff,
1354         0x9060, 0x0000007f, 0x00000020,
1355         0x9508, 0x00010000, 0x00010000,
1356         0xae00, 0x00100000, 0x000ff07c,
1357         0xac14, 0x000003ff, 0x0000000f,
1358         0xac10, 0xffffffff, 0x7564fdec,
1359         0xac0c, 0xffffffff, 0x3120b9a8,
1360         0xac08, 0x20000000, 0x0f9c0000
1361 };
1362
1363 static const u32 hawaii_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffd,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00200100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c048, 0xffffffff, 0x00010000,
1408         0x3c04c, 0xffffffff, 0x00030002,
1409         0x3c050, 0xffffffff, 0x00040007,
1410         0x3c054, 0xffffffff, 0x00060005,
1411         0x3c058, 0xffffffff, 0x00090008,
1412         0x3c05c, 0xffffffff, 0x00010000,
1413         0x3c060, 0xffffffff, 0x00030002,
1414         0x3c064, 0xffffffff, 0x00040007,
1415         0x3c068, 0xffffffff, 0x00060005,
1416         0x3c06c, 0xffffffff, 0x00090008,
1417         0x3c070, 0xffffffff, 0x00010000,
1418         0x3c074, 0xffffffff, 0x00030002,
1419         0x3c078, 0xffffffff, 0x00040007,
1420         0x3c07c, 0xffffffff, 0x00060005,
1421         0x3c080, 0xffffffff, 0x00090008,
1422         0x3c084, 0xffffffff, 0x00010000,
1423         0x3c088, 0xffffffff, 0x00030002,
1424         0x3c08c, 0xffffffff, 0x00040007,
1425         0x3c090, 0xffffffff, 0x00060005,
1426         0x3c094, 0xffffffff, 0x00090008,
1427         0x3c098, 0xffffffff, 0x00010000,
1428         0x3c09c, 0xffffffff, 0x00030002,
1429         0x3c0a0, 0xffffffff, 0x00040007,
1430         0x3c0a4, 0xffffffff, 0x00060005,
1431         0x3c0a8, 0xffffffff, 0x00090008,
1432         0x3c0ac, 0xffffffff, 0x00010000,
1433         0x3c0b0, 0xffffffff, 0x00030002,
1434         0x3c0b4, 0xffffffff, 0x00040007,
1435         0x3c0b8, 0xffffffff, 0x00060005,
1436         0x3c0bc, 0xffffffff, 0x00090008,
1437         0x3c0c0, 0xffffffff, 0x00010000,
1438         0x3c0c4, 0xffffffff, 0x00030002,
1439         0x3c0c8, 0xffffffff, 0x00040007,
1440         0x3c0cc, 0xffffffff, 0x00060005,
1441         0x3c0d0, 0xffffffff, 0x00090008,
1442         0x3c0d4, 0xffffffff, 0x00010000,
1443         0x3c0d8, 0xffffffff, 0x00030002,
1444         0x3c0dc, 0xffffffff, 0x00040007,
1445         0x3c0e0, 0xffffffff, 0x00060005,
1446         0x3c0e4, 0xffffffff, 0x00090008,
1447         0x3c0e8, 0xffffffff, 0x00010000,
1448         0x3c0ec, 0xffffffff, 0x00030002,
1449         0x3c0f0, 0xffffffff, 0x00040007,
1450         0x3c0f4, 0xffffffff, 0x00060005,
1451         0x3c0f8, 0xffffffff, 0x00090008,
1452         0xc318, 0xffffffff, 0x00020200,
1453         0x3350, 0xffffffff, 0x00000200,
1454         0x15c0, 0xffffffff, 0x00000400,
1455         0x55e8, 0xffffffff, 0x00000000,
1456         0x2f50, 0xffffffff, 0x00000902,
1457         0x3c000, 0xffffffff, 0x96940200,
1458         0x8708, 0xffffffff, 0x00900100,
1459         0xc424, 0xffffffff, 0x0020003f,
1460         0x38, 0xffffffff, 0x0140001c,
1461         0x3c, 0x000f0000, 0x000f0000,
1462         0x220, 0xffffffff, 0xc060000c,
1463         0x224, 0xc0000fff, 0x00000100,
1464         0xf90, 0xffffffff, 0x00000100,
1465         0xf98, 0x00000101, 0x00000000,
1466         0x20a8, 0xffffffff, 0x00000104,
1467         0x55e4, 0xff000fff, 0x00000100,
1468         0x30cc, 0xc0000fff, 0x00000104,
1469         0xc1e4, 0x00000001, 0x00000001,
1470         0xd00c, 0xff000ff0, 0x00000100,
1471         0xd80c, 0xff000ff0, 0x00000100
1472 };
1473
1474 static void cik_init_golden_registers(struct radeon_device *rdev)
1475 {
1476         switch (rdev->family) {
1477         case CHIP_BONAIRE:
1478                 radeon_program_register_sequence(rdev,
1479                                                  bonaire_mgcg_cgcg_init,
1480                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1481                 radeon_program_register_sequence(rdev,
1482                                                  bonaire_golden_registers,
1483                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1484                 radeon_program_register_sequence(rdev,
1485                                                  bonaire_golden_common_registers,
1486                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1487                 radeon_program_register_sequence(rdev,
1488                                                  bonaire_golden_spm_registers,
1489                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1490                 break;
1491         case CHIP_KABINI:
1492                 radeon_program_register_sequence(rdev,
1493                                                  kalindi_mgcg_cgcg_init,
1494                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1495                 radeon_program_register_sequence(rdev,
1496                                                  kalindi_golden_registers,
1497                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1498                 radeon_program_register_sequence(rdev,
1499                                                  kalindi_golden_common_registers,
1500                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1501                 radeon_program_register_sequence(rdev,
1502                                                  kalindi_golden_spm_registers,
1503                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1504                 break;
1505         case CHIP_KAVERI:
1506                 radeon_program_register_sequence(rdev,
1507                                                  spectre_mgcg_cgcg_init,
1508                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1509                 radeon_program_register_sequence(rdev,
1510                                                  spectre_golden_registers,
1511                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1512                 radeon_program_register_sequence(rdev,
1513                                                  spectre_golden_common_registers,
1514                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1515                 radeon_program_register_sequence(rdev,
1516                                                  spectre_golden_spm_registers,
1517                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1518                 break;
1519         case CHIP_HAWAII:
1520                 radeon_program_register_sequence(rdev,
1521                                                  hawaii_mgcg_cgcg_init,
1522                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1523                 radeon_program_register_sequence(rdev,
1524                                                  hawaii_golden_registers,
1525                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1526                 radeon_program_register_sequence(rdev,
1527                                                  hawaii_golden_common_registers,
1528                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1529                 radeon_program_register_sequence(rdev,
1530                                                  hawaii_golden_spm_registers,
1531                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1532                 break;
1533         default:
1534                 break;
1535         }
1536 }
1537
1538 /**
1539  * cik_get_xclk - get the xclk
1540  *
1541  * @rdev: radeon_device pointer
1542  *
1543  * Returns the reference clock used by the gfx engine
1544  * (CIK).
1545  */
1546 u32 cik_get_xclk(struct radeon_device *rdev)
1547 {
1548         u32 reference_clock = rdev->clock.spll.reference_freq;
1549
1550         if (rdev->flags & RADEON_IS_IGP) {
1551                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1552                         return reference_clock / 2;
1553         } else {
1554                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1555                         return reference_clock / 4;
1556         }
1557         return reference_clock;
1558 }
1559
1560 /**
1561  * cik_mm_rdoorbell - read a doorbell dword
1562  *
1563  * @rdev: radeon_device pointer
1564  * @index: doorbell index
1565  *
1566  * Returns the value in the doorbell aperture at the
1567  * requested doorbell index (CIK).
1568  */
1569 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1570 {
1571         if (index < rdev->doorbell.num_doorbells) {
1572                 return readl(rdev->doorbell.ptr + index);
1573         } else {
1574                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1575                 return 0;
1576         }
1577 }
1578
1579 /**
1580  * cik_mm_wdoorbell - write a doorbell dword
1581  *
1582  * @rdev: radeon_device pointer
1583  * @index: doorbell index
1584  * @v: value to write
1585  *
1586  * Writes @v to the doorbell aperture at the
1587  * requested doorbell index (CIK).
1588  */
1589 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1590 {
1591         if (index < rdev->doorbell.num_doorbells) {
1592                 writel(v, rdev->doorbell.ptr + index);
1593         } else {
1594                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1595         }
1596 }
1597
1598 #define BONAIRE_IO_MC_REGS_SIZE 36
1599
1600 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1601 {
1602         {0x00000070, 0x04400000},
1603         {0x00000071, 0x80c01803},
1604         {0x00000072, 0x00004004},
1605         {0x00000073, 0x00000100},
1606         {0x00000074, 0x00ff0000},
1607         {0x00000075, 0x34000000},
1608         {0x00000076, 0x08000014},
1609         {0x00000077, 0x00cc08ec},
1610         {0x00000078, 0x00000400},
1611         {0x00000079, 0x00000000},
1612         {0x0000007a, 0x04090000},
1613         {0x0000007c, 0x00000000},
1614         {0x0000007e, 0x4408a8e8},
1615         {0x0000007f, 0x00000304},
1616         {0x00000080, 0x00000000},
1617         {0x00000082, 0x00000001},
1618         {0x00000083, 0x00000002},
1619         {0x00000084, 0xf3e4f400},
1620         {0x00000085, 0x052024e3},
1621         {0x00000087, 0x00000000},
1622         {0x00000088, 0x01000000},
1623         {0x0000008a, 0x1c0a0000},
1624         {0x0000008b, 0xff010000},
1625         {0x0000008d, 0xffffefff},
1626         {0x0000008e, 0xfff3efff},
1627         {0x0000008f, 0xfff3efbf},
1628         {0x00000092, 0xf7ffffff},
1629         {0x00000093, 0xffffff7f},
1630         {0x00000095, 0x00101101},
1631         {0x00000096, 0x00000fff},
1632         {0x00000097, 0x00116fff},
1633         {0x00000098, 0x60010000},
1634         {0x00000099, 0x10010000},
1635         {0x0000009a, 0x00006000},
1636         {0x0000009b, 0x00001000},
1637         {0x0000009f, 0x00b48000}
1638 };
1639
1640 #define HAWAII_IO_MC_REGS_SIZE 22
1641
1642 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1643 {
1644         {0x0000007d, 0x40000000},
1645         {0x0000007e, 0x40180304},
1646         {0x0000007f, 0x0000ff00},
1647         {0x00000081, 0x00000000},
1648         {0x00000083, 0x00000800},
1649         {0x00000086, 0x00000000},
1650         {0x00000087, 0x00000100},
1651         {0x00000088, 0x00020100},
1652         {0x00000089, 0x00000000},
1653         {0x0000008b, 0x00040000},
1654         {0x0000008c, 0x00000100},
1655         {0x0000008e, 0xff010000},
1656         {0x00000090, 0xffffefff},
1657         {0x00000091, 0xfff3efff},
1658         {0x00000092, 0xfff3efbf},
1659         {0x00000093, 0xf7ffffff},
1660         {0x00000094, 0xffffff7f},
1661         {0x00000095, 0x00000fff},
1662         {0x00000096, 0x00116fff},
1663         {0x00000097, 0x60010000},
1664         {0x00000098, 0x10010000},
1665         {0x0000009f, 0x00c79000}
1666 };
1667
1668
1669 /**
1670  * cik_srbm_select - select specific register instances
1671  *
1672  * @rdev: radeon_device pointer
1673  * @me: selected ME (micro engine)
1674  * @pipe: pipe
1675  * @queue: queue
1676  * @vmid: VMID
1677  *
1678  * Switches the currently active registers instances.  Some
1679  * registers are instanced per VMID, others are instanced per
1680  * me/pipe/queue combination.
1681  */
1682 static void cik_srbm_select(struct radeon_device *rdev,
1683                             u32 me, u32 pipe, u32 queue, u32 vmid)
1684 {
1685         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1686                              MEID(me & 0x3) |
1687                              VMID(vmid & 0xf) |
1688                              QUEUEID(queue & 0x7));
1689         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1690 }
1691
1692 /* ucode loading */
1693 /**
1694  * ci_mc_load_microcode - load MC ucode into the hw
1695  *
1696  * @rdev: radeon_device pointer
1697  *
1698  * Load the GDDR MC ucode into the hw (CIK).
1699  * Returns 0 on success, error on failure.
1700  */
1701 int ci_mc_load_microcode(struct radeon_device *rdev)
1702 {
1703         const __be32 *fw_data;
1704         u32 running, blackout = 0;
1705         u32 *io_mc_regs;
1706         int i, ucode_size, regs_size;
1707
1708         if (!rdev->mc_fw)
1709                 return -EINVAL;
1710
1711         switch (rdev->family) {
1712         case CHIP_BONAIRE:
1713                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1714                 ucode_size = CIK_MC_UCODE_SIZE;
1715                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1716                 break;
1717         case CHIP_HAWAII:
1718                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1719                 ucode_size = HAWAII_MC_UCODE_SIZE;
1720                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1721                 break;
1722         default:
1723                 return -EINVAL;
1724         }
1725
1726         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1727
1728         if (running == 0) {
1729                 if (running) {
1730                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1731                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1732                 }
1733
1734                 /* reset the engine and set to writable */
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1736                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1737
1738                 /* load mc io regs */
1739                 for (i = 0; i < regs_size; i++) {
1740                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1741                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1742                 }
1743                 /* load the MC ucode */
1744                 fw_data = (const __be32 *)rdev->mc_fw->data;
1745                 for (i = 0; i < ucode_size; i++)
1746                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1747
1748                 /* put the engine back into the active state */
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1751                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1752
1753                 /* wait for training to complete */
1754                 for (i = 0; i < rdev->usec_timeout; i++) {
1755                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1756                                 break;
1757                         udelay(1);
1758                 }
1759                 for (i = 0; i < rdev->usec_timeout; i++) {
1760                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1761                                 break;
1762                         udelay(1);
1763                 }
1764
1765                 if (running)
1766                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1767         }
1768
1769         return 0;
1770 }
1771
1772 /**
1773  * cik_init_microcode - load ucode images from disk
1774  *
1775  * @rdev: radeon_device pointer
1776  *
1777  * Use the firmware interface to load the ucode images into
1778  * the driver (not loaded into hw).
1779  * Returns 0 on success, error on failure.
1780  */
1781 static int cik_init_microcode(struct radeon_device *rdev)
1782 {
1783         const char *chip_name;
1784         size_t pfp_req_size, me_req_size, ce_req_size,
1785                 mec_req_size, rlc_req_size, mc_req_size = 0,
1786                 sdma_req_size, smc_req_size = 0;
1787         char fw_name[30];
1788         int err;
1789
1790         DRM_DEBUG("\n");
1791
1792         switch (rdev->family) {
1793         case CHIP_BONAIRE:
1794                 chip_name = "BONAIRE";
1795                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1796                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1797                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1798                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1799                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1800                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1801                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1802                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1803                 break;
1804         case CHIP_HAWAII:
1805                 chip_name = "HAWAII";
1806                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1807                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1808                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1809                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1810                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1811                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1812                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1813                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1814                 break;
1815         case CHIP_KAVERI:
1816                 chip_name = "KAVERI";
1817                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1818                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1819                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1820                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1821                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1822                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1823                 break;
1824         case CHIP_KABINI:
1825                 chip_name = "KABINI";
1826                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1827                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1828                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1829                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1830                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1831                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1832                 break;
1833         default: BUG();
1834         }
1835
1836         DRM_INFO("Loading %s Microcode\n", chip_name);
1837
1838         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1839         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1840         if (err)
1841                 goto out;
1842         if (rdev->pfp_fw->size != pfp_req_size) {
1843                 printk(KERN_ERR
1844                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1845                        rdev->pfp_fw->size, fw_name);
1846                 err = -EINVAL;
1847                 goto out;
1848         }
1849
1850         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1851         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1852         if (err)
1853                 goto out;
1854         if (rdev->me_fw->size != me_req_size) {
1855                 printk(KERN_ERR
1856                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1857                        rdev->me_fw->size, fw_name);
1858                 err = -EINVAL;
1859         }
1860
1861         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1862         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1863         if (err)
1864                 goto out;
1865         if (rdev->ce_fw->size != ce_req_size) {
1866                 printk(KERN_ERR
1867                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1868                        rdev->ce_fw->size, fw_name);
1869                 err = -EINVAL;
1870         }
1871
1872         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1873         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1874         if (err)
1875                 goto out;
1876         if (rdev->mec_fw->size != mec_req_size) {
1877                 printk(KERN_ERR
1878                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1879                        rdev->mec_fw->size, fw_name);
1880                 err = -EINVAL;
1881         }
1882
1883         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1884         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1885         if (err)
1886                 goto out;
1887         if (rdev->rlc_fw->size != rlc_req_size) {
1888                 printk(KERN_ERR
1889                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1890                        rdev->rlc_fw->size, fw_name);
1891                 err = -EINVAL;
1892         }
1893
1894         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1895         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1896         if (err)
1897                 goto out;
1898         if (rdev->sdma_fw->size != sdma_req_size) {
1899                 printk(KERN_ERR
1900                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1901                        rdev->sdma_fw->size, fw_name);
1902                 err = -EINVAL;
1903         }
1904
1905         /* No SMC, MC ucode on APUs */
1906         if (!(rdev->flags & RADEON_IS_IGP)) {
1907                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1908                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1909                 if (err)
1910                         goto out;
1911                 if (rdev->mc_fw->size != mc_req_size) {
1912                         printk(KERN_ERR
1913                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1914                                rdev->mc_fw->size, fw_name);
1915                         err = -EINVAL;
1916                 }
1917
1918                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1919                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1920                 if (err) {
1921                         printk(KERN_ERR
1922                                "smc: error loading firmware \"%s\"\n",
1923                                fw_name);
1924                         release_firmware(rdev->smc_fw);
1925                         rdev->smc_fw = NULL;
1926                         err = 0;
1927                 } else if (rdev->smc_fw->size != smc_req_size) {
1928                         printk(KERN_ERR
1929                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1930                                rdev->smc_fw->size, fw_name);
1931                         err = -EINVAL;
1932                 }
1933         }
1934
1935 out:
1936         if (err) {
1937                 if (err != -EINVAL)
1938                         printk(KERN_ERR
1939                                "cik_cp: Failed to load firmware \"%s\"\n",
1940                                fw_name);
1941                 release_firmware(rdev->pfp_fw);
1942                 rdev->pfp_fw = NULL;
1943                 release_firmware(rdev->me_fw);
1944                 rdev->me_fw = NULL;
1945                 release_firmware(rdev->ce_fw);
1946                 rdev->ce_fw = NULL;
1947                 release_firmware(rdev->rlc_fw);
1948                 rdev->rlc_fw = NULL;
1949                 release_firmware(rdev->mc_fw);
1950                 rdev->mc_fw = NULL;
1951                 release_firmware(rdev->smc_fw);
1952                 rdev->smc_fw = NULL;
1953         }
1954         return err;
1955 }
1956
1957 /*
1958  * Core functions
1959  */
1960 /**
1961  * cik_tiling_mode_table_init - init the hw tiling table
1962  *
1963  * @rdev: radeon_device pointer
1964  *
1965  * Starting with SI, the tiling setup is done globally in a
1966  * set of 32 tiling modes.  Rather than selecting each set of
1967  * parameters per surface as on older asics, we just select
1968  * which index in the tiling table we want to use, and the
1969  * surface uses those parameters (CIK).
1970  */
1971 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1972 {
1973         const u32 num_tile_mode_states = 32;
1974         const u32 num_secondary_tile_mode_states = 16;
1975         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1976         u32 num_pipe_configs;
1977         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1978                 rdev->config.cik.max_shader_engines;
1979
1980         switch (rdev->config.cik.mem_row_size_in_kb) {
1981         case 1:
1982                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1983                 break;
1984         case 2:
1985         default:
1986                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1987                 break;
1988         case 4:
1989                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1990                 break;
1991         }
1992
1993         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1994         if (num_pipe_configs > 8)
1995                 num_pipe_configs = 16;
1996
1997         if (num_pipe_configs == 16) {
1998                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1999                         switch (reg_offset) {
2000                         case 0:
2001                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2002                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2003                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2004                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2005                                 break;
2006                         case 1:
2007                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2008                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2009                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2010                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2011                                 break;
2012                         case 2:
2013                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2016                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017                                 break;
2018                         case 3:
2019                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2020                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2022                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2023                                 break;
2024                         case 4:
2025                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2027                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2028                                                  TILE_SPLIT(split_equal_to_row_size));
2029                                 break;
2030                         case 5:
2031                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2032                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033                                 break;
2034                         case 6:
2035                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2036                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2037                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2038                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2039                                 break;
2040                         case 7:
2041                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2042                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2043                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2044                                                  TILE_SPLIT(split_equal_to_row_size));
2045                                 break;
2046                         case 8:
2047                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2048                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2049                                 break;
2050                         case 9:
2051                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2053                                 break;
2054                         case 10:
2055                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2057                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2058                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059                                 break;
2060                         case 11:
2061                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2064                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065                                 break;
2066                         case 12:
2067                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2069                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2070                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                                 break;
2072                         case 13:
2073                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2075                                 break;
2076                         case 14:
2077                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081                                 break;
2082                         case 16:
2083                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2086                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                                 break;
2088                         case 17:
2089                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                                 break;
2094                         case 27:
2095                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2096                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2097                                 break;
2098                         case 28:
2099                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2100                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2101                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2102                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103                                 break;
2104                         case 29:
2105                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2107                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2108                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2109                                 break;
2110                         case 30:
2111                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2112                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2113                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2114                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2115                                 break;
2116                         default:
2117                                 gb_tile_moden = 0;
2118                                 break;
2119                         }
2120                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2121                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2122                 }
2123                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2124                         switch (reg_offset) {
2125                         case 0:
2126                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2127                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2128                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2129                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2130                                 break;
2131                         case 1:
2132                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2133                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2134                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2136                                 break;
2137                         case 2:
2138                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2139                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2140                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2141                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2142                                 break;
2143                         case 3:
2144                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2145                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2146                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2147                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2148                                 break;
2149                         case 4:
2150                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2151                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2152                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2153                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2154                                 break;
2155                         case 5:
2156                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2159                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2160                                 break;
2161                         case 6:
2162                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2164                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2165                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2166                                 break;
2167                         case 8:
2168                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2169                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2171                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2172                                 break;
2173                         case 9:
2174                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2176                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2177                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2178                                 break;
2179                         case 10:
2180                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2183                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2184                                 break;
2185                         case 11:
2186                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2190                                 break;
2191                         case 12:
2192                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2194                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2195                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2196                                 break;
2197                         case 13:
2198                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2200                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2201                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2202                                 break;
2203                         case 14:
2204                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2206                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2207                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2208                                 break;
2209                         default:
2210                                 gb_tile_moden = 0;
2211                                 break;
2212                         }
2213                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2214                 }
2215         } else if (num_pipe_configs == 8) {
2216                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2217                         switch (reg_offset) {
2218                         case 0:
2219                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2221                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2222                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2223                                 break;
2224                         case 1:
2225                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2227                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2228                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2229                                 break;
2230                         case 2:
2231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2233                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2234                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2235                                 break;
2236                         case 3:
2237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2238                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2239                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2240                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2241                                 break;
2242                         case 4:
2243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2245                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2246                                                  TILE_SPLIT(split_equal_to_row_size));
2247                                 break;
2248                         case 5:
2249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2250                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251                                 break;
2252                         case 6:
2253                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2254                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2255                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2256                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2257                                 break;
2258                         case 7:
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2262                                                  TILE_SPLIT(split_equal_to_row_size));
2263                                 break;
2264                         case 8:
2265                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2266                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2267                                 break;
2268                         case 9:
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2271                                 break;
2272                         case 10:
2273                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2274                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2276                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277                                 break;
2278                         case 11:
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283                                 break;
2284                         case 12:
2285                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2286                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2287                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2288                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2289                                 break;
2290                         case 13:
2291                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2292                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2293                                 break;
2294                         case 14:
2295                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2298                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299                                 break;
2300                         case 16:
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2304                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                                 break;
2306                         case 17:
2307                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2308                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2310                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                                 break;
2312                         case 27:
2313                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2315                                 break;
2316                         case 28:
2317                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2319                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                                 break;
2322                         case 29:
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2324                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2326                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2327                                 break;
2328                         case 30:
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333                                 break;
2334                         default:
2335                                 gb_tile_moden = 0;
2336                                 break;
2337                         }
2338                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2339                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2340                 }
2341                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2342                         switch (reg_offset) {
2343                         case 0:
2344                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2347                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2348                                 break;
2349                         case 1:
2350                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2354                                 break;
2355                         case 2:
2356                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2357                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2358                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2360                                 break;
2361                         case 3:
2362                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2366                                 break;
2367                         case 4:
2368                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2371                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2372                                 break;
2373                         case 5:
2374                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2378                                 break;
2379                         case 6:
2380                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2384                                 break;
2385                         case 8:
2386                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2388                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2389                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2390                                 break;
2391                         case 9:
2392                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2394                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2395                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2396                                 break;
2397                         case 10:
2398                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2400                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2402                                 break;
2403                         case 11:
2404                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2407                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2408                                 break;
2409                         case 12:
2410                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2413                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2414                                 break;
2415                         case 13:
2416                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2420                                 break;
2421                         case 14:
2422                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2426                                 break;
2427                         default:
2428                                 gb_tile_moden = 0;
2429                                 break;
2430                         }
2431                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2432                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2433                 }
2434         } else if (num_pipe_configs == 4) {
2435                 if (num_rbs == 4) {
2436                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2437                                 switch (reg_offset) {
2438                                 case 0:
2439                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2441                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2442                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2443                                         break;
2444                                 case 1:
2445                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2447                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2449                                         break;
2450                                 case 2:
2451                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2454                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2455                                         break;
2456                                 case 3:
2457                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2459                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2460                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2461                                         break;
2462                                 case 4:
2463                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2466                                                          TILE_SPLIT(split_equal_to_row_size));
2467                                         break;
2468                                 case 5:
2469                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2470                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471                                         break;
2472                                 case 6:
2473                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2474                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2475                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2476                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2477                                         break;
2478                                 case 7:
2479                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2480                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2481                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482                                                          TILE_SPLIT(split_equal_to_row_size));
2483                                         break;
2484                                 case 8:
2485                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2486                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2487                                         break;
2488                                 case 9:
2489                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2491                                         break;
2492                                 case 10:
2493                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2495                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2496                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497                                         break;
2498                                 case 11:
2499                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2501                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2502                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                                         break;
2504                                 case 12:
2505                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2506                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2507                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                                         break;
2510                                 case 13:
2511                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2512                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2513                                         break;
2514                                 case 14:
2515                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2517                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2518                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2519                                         break;
2520                                 case 16:
2521                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2523                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                                         break;
2526                                 case 17:
2527                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2529                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2530                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2531                                         break;
2532                                 case 27:
2533                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2535                                         break;
2536                                 case 28:
2537                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2538                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2539                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541                                         break;
2542                                 case 29:
2543                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2545                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2546                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547                                         break;
2548                                 case 30:
2549                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2552                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                                         break;
2554                                 default:
2555                                         gb_tile_moden = 0;
2556                                         break;
2557                                 }
2558                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2559                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2560                         }
2561                 } else if (num_rbs < 4) {
2562                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2563                                 switch (reg_offset) {
2564                                 case 0:
2565                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2567                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2568                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2569                                         break;
2570                                 case 1:
2571                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2573                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2575                                         break;
2576                                 case 2:
2577                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2579                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2580                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2581                                         break;
2582                                 case 3:
2583                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2585                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2586                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2587                                         break;
2588                                 case 4:
2589                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2591                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2592                                                          TILE_SPLIT(split_equal_to_row_size));
2593                                         break;
2594                                 case 5:
2595                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2596                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597                                         break;
2598                                 case 6:
2599                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2602                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603                                         break;
2604                                 case 7:
2605                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2606                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2607                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2608                                                          TILE_SPLIT(split_equal_to_row_size));
2609                                         break;
2610                                 case 8:
2611                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2612                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2613                                         break;
2614                                 case 9:
2615                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2617                                         break;
2618                                 case 10:
2619                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2621                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2622                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623                                         break;
2624                                 case 11:
2625                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2628                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629                                         break;
2630                                 case 12:
2631                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2632                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635                                         break;
2636                                 case 13:
2637                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2639                                         break;
2640                                 case 14:
2641                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                                         break;
2646                                 case 16:
2647                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2650                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                                         break;
2652                                 case 17:
2653                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2654                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                         break;
2658                                 case 27:
2659                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2660                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2661                                         break;
2662                                 case 28:
2663                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2665                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2666                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                                         break;
2668                                 case 29:
2669                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2671                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2672                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2673                                         break;
2674                                 case 30:
2675                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2676                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2677                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2679                                         break;
2680                                 default:
2681                                         gb_tile_moden = 0;
2682                                         break;
2683                                 }
2684                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2685                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2686                         }
2687                 }
2688                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2689                         switch (reg_offset) {
2690                         case 0:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2695                                 break;
2696                         case 1:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2700                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2701                                 break;
2702                         case 2:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2706                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2707                                 break;
2708                         case 3:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2712                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2713                                 break;
2714                         case 4:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 5:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2725                                 break;
2726                         case 6:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2730                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2731                                 break;
2732                         case 8:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2737                                 break;
2738                         case 9:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2743                                 break;
2744                         case 10:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2749                                 break;
2750                         case 11:
2751                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2755                                 break;
2756                         case 12:
2757                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2760                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2761                                 break;
2762                         case 13:
2763                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2767                                 break;
2768                         case 14:
2769                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2771                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2772                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2773                                 break;
2774                         default:
2775                                 gb_tile_moden = 0;
2776                                 break;
2777                         }
2778                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2779                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2780                 }
2781         } else if (num_pipe_configs == 2) {
2782                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2783                         switch (reg_offset) {
2784                         case 0:
2785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2787                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2789                                 break;
2790                         case 1:
2791                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2793                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2794                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2795                                 break;
2796                         case 2:
2797                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2800                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801                                 break;
2802                         case 3:
2803                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2806                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2807                                 break;
2808                         case 4:
2809                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2811                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2812                                                  TILE_SPLIT(split_equal_to_row_size));
2813                                 break;
2814                         case 5:
2815                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                                 break;
2818                         case 6:
2819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2822                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2823                                 break;
2824                         case 7:
2825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2826                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2827                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2828                                                  TILE_SPLIT(split_equal_to_row_size));
2829                                 break;
2830                         case 8:
2831                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2832                                 break;
2833                         case 9:
2834                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2836                                 break;
2837                         case 10:
2838                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2841                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842                                 break;
2843                         case 11:
2844                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2847                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848                                 break;
2849                         case 12:
2850                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2851                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2852                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2853                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2854                                 break;
2855                         case 13:
2856                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2857                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2858                                 break;
2859                         case 14:
2860                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2863                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864                                 break;
2865                         case 16:
2866                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2869                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870                                 break;
2871                         case 17:
2872                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2873                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2875                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2876                                 break;
2877                         case 27:
2878                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2880                                 break;
2881                         case 28:
2882                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2883                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2884                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2885                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886                                 break;
2887                         case 29:
2888                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2889                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2890                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2891                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892                                 break;
2893                         case 30:
2894                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2895                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2896                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2897                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898                                 break;
2899                         default:
2900                                 gb_tile_moden = 0;
2901                                 break;
2902                         }
2903                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2904                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2905                 }
2906                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2907                         switch (reg_offset) {
2908                         case 0:
2909                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2910                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2912                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2913                                 break;
2914                         case 1:
2915                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2919                                 break;
2920                         case 2:
2921                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2923                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2925                                 break;
2926                         case 3:
2927                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2930                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2931                                 break;
2932                         case 4:
2933                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2937                                 break;
2938                         case 5:
2939                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2943                                 break;
2944                         case 6:
2945                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2948                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2949                                 break;
2950                         case 8:
2951                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2952                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2955                                 break;
2956                         case 9:
2957                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2958                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2959                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2960                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2961                                 break;
2962                         case 10:
2963                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2964                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2967                                 break;
2968                         case 11:
2969                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2970                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2971                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2973                                 break;
2974                         case 12:
2975                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2979                                 break;
2980                         case 13:
2981                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2985                                 break;
2986                         case 14:
2987                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2991                                 break;
2992                         default:
2993                                 gb_tile_moden = 0;
2994                                 break;
2995                         }
2996                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2997                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2998                 }
2999         } else
3000                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3001 }
3002
3003 /**
3004  * cik_select_se_sh - select which SE, SH to address
3005  *
3006  * @rdev: radeon_device pointer
3007  * @se_num: shader engine to address
3008  * @sh_num: sh block to address
3009  *
3010  * Select which SE, SH combinations to address. Certain
3011  * registers are instanced per SE or SH.  0xffffffff means
3012  * broadcast to all SEs or SHs (CIK).
3013  */
3014 static void cik_select_se_sh(struct radeon_device *rdev,
3015                              u32 se_num, u32 sh_num)
3016 {
3017         u32 data = INSTANCE_BROADCAST_WRITES;
3018
3019         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3020                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3021         else if (se_num == 0xffffffff)
3022                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3023         else if (sh_num == 0xffffffff)
3024                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3025         else
3026                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3027         WREG32(GRBM_GFX_INDEX, data);
3028 }
3029
3030 /**
3031  * cik_create_bitmask - create a bitmask
3032  *
3033  * @bit_width: length of the mask
3034  *
3035  * create a variable length bit mask (CIK).
3036  * Returns the bitmask.
3037  */
3038 static u32 cik_create_bitmask(u32 bit_width)
3039 {
3040         u32 i, mask = 0;
3041
3042         for (i = 0; i < bit_width; i++) {
3043                 mask <<= 1;
3044                 mask |= 1;
3045         }
3046         return mask;
3047 }
3048
3049 /**
3050  * cik_select_se_sh - select which SE, SH to address
3051  *
3052  * @rdev: radeon_device pointer
3053  * @max_rb_num: max RBs (render backends) for the asic
3054  * @se_num: number of SEs (shader engines) for the asic
3055  * @sh_per_se: number of SH blocks per SE for the asic
3056  *
3057  * Calculates the bitmask of disabled RBs (CIK).
3058  * Returns the disabled RB bitmask.
3059  */
3060 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3061                               u32 max_rb_num_per_se,
3062                               u32 sh_per_se)
3063 {
3064         u32 data, mask;
3065
3066         data = RREG32(CC_RB_BACKEND_DISABLE);
3067         if (data & 1)
3068                 data &= BACKEND_DISABLE_MASK;
3069         else
3070                 data = 0;
3071         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3072
3073         data >>= BACKEND_DISABLE_SHIFT;
3074
3075         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3076
3077         return data & mask;
3078 }
3079
3080 /**
3081  * cik_setup_rb - setup the RBs on the asic
3082  *
3083  * @rdev: radeon_device pointer
3084  * @se_num: number of SEs (shader engines) for the asic
3085  * @sh_per_se: number of SH blocks per SE for the asic
3086  * @max_rb_num: max RBs (render backends) for the asic
3087  *
3088  * Configures per-SE/SH RB registers (CIK).
3089  */
3090 static void cik_setup_rb(struct radeon_device *rdev,
3091                          u32 se_num, u32 sh_per_se,
3092                          u32 max_rb_num_per_se)
3093 {
3094         int i, j;
3095         u32 data, mask;
3096         u32 disabled_rbs = 0;
3097         u32 enabled_rbs = 0;
3098
3099         for (i = 0; i < se_num; i++) {
3100                 for (j = 0; j < sh_per_se; j++) {
3101                         cik_select_se_sh(rdev, i, j);
3102                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3103                         if (rdev->family == CHIP_HAWAII)
3104                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3105                         else
3106                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3107                 }
3108         }
3109         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3110
3111         mask = 1;
3112         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3113                 if (!(disabled_rbs & mask))
3114                         enabled_rbs |= mask;
3115                 mask <<= 1;
3116         }
3117
3118         rdev->config.cik.backend_enable_mask = enabled_rbs;
3119
3120         for (i = 0; i < se_num; i++) {
3121                 cik_select_se_sh(rdev, i, 0xffffffff);
3122                 data = 0;
3123                 for (j = 0; j < sh_per_se; j++) {
3124                         switch (enabled_rbs & 3) {
3125                         case 0:
3126                                 if (j == 0)
3127                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3128                                 else
3129                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3130                                 break;
3131                         case 1:
3132                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3133                                 break;
3134                         case 2:
3135                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3136                                 break;
3137                         case 3:
3138                         default:
3139                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3140                                 break;
3141                         }
3142                         enabled_rbs >>= 2;
3143                 }
3144                 WREG32(PA_SC_RASTER_CONFIG, data);
3145         }
3146         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3147 }
3148
3149 /**
3150  * cik_gpu_init - setup the 3D engine
3151  *
3152  * @rdev: radeon_device pointer
3153  *
3154  * Configures the 3D engine and tiling configuration
3155  * registers so that the 3D engine is usable.
3156  */
3157 static void cik_gpu_init(struct radeon_device *rdev)
3158 {
3159         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3160         u32 mc_shared_chmap, mc_arb_ramcfg;
3161         u32 hdp_host_path_cntl;
3162         u32 tmp;
3163         int i, j;
3164
3165         switch (rdev->family) {
3166         case CHIP_BONAIRE:
3167                 rdev->config.cik.max_shader_engines = 2;
3168                 rdev->config.cik.max_tile_pipes = 4;
3169                 rdev->config.cik.max_cu_per_sh = 7;
3170                 rdev->config.cik.max_sh_per_se = 1;
3171                 rdev->config.cik.max_backends_per_se = 2;
3172                 rdev->config.cik.max_texture_channel_caches = 4;
3173                 rdev->config.cik.max_gprs = 256;
3174                 rdev->config.cik.max_gs_threads = 32;
3175                 rdev->config.cik.max_hw_contexts = 8;
3176
3177                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3178                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3179                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3180                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3181                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3182                 break;
3183         case CHIP_HAWAII:
3184                 rdev->config.cik.max_shader_engines = 4;
3185                 rdev->config.cik.max_tile_pipes = 16;
3186                 rdev->config.cik.max_cu_per_sh = 11;
3187                 rdev->config.cik.max_sh_per_se = 1;
3188                 rdev->config.cik.max_backends_per_se = 4;
3189                 rdev->config.cik.max_texture_channel_caches = 16;
3190                 rdev->config.cik.max_gprs = 256;
3191                 rdev->config.cik.max_gs_threads = 32;
3192                 rdev->config.cik.max_hw_contexts = 8;
3193
3194                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3195                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3196                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3197                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3198                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3199                 break;
3200         case CHIP_KAVERI:
3201                 rdev->config.cik.max_shader_engines = 1;
3202                 rdev->config.cik.max_tile_pipes = 4;
3203                 if ((rdev->pdev->device == 0x1304) ||
3204                     (rdev->pdev->device == 0x1305) ||
3205                     (rdev->pdev->device == 0x130C) ||
3206                     (rdev->pdev->device == 0x130F) ||
3207                     (rdev->pdev->device == 0x1310) ||
3208                     (rdev->pdev->device == 0x1311) ||
3209                     (rdev->pdev->device == 0x131C)) {
3210                         rdev->config.cik.max_cu_per_sh = 8;
3211                         rdev->config.cik.max_backends_per_se = 2;
3212                 } else if ((rdev->pdev->device == 0x1309) ||
3213                            (rdev->pdev->device == 0x130A) ||
3214                            (rdev->pdev->device == 0x130D) ||
3215                            (rdev->pdev->device == 0x1313) ||
3216                            (rdev->pdev->device == 0x131D)) {
3217                         rdev->config.cik.max_cu_per_sh = 6;
3218                         rdev->config.cik.max_backends_per_se = 2;
3219                 } else if ((rdev->pdev->device == 0x1306) ||
3220                            (rdev->pdev->device == 0x1307) ||
3221                            (rdev->pdev->device == 0x130B) ||
3222                            (rdev->pdev->device == 0x130E) ||
3223                            (rdev->pdev->device == 0x1315) ||
3224                            (rdev->pdev->device == 0x131B)) {
3225                         rdev->config.cik.max_cu_per_sh = 4;
3226                         rdev->config.cik.max_backends_per_se = 1;
3227                 } else {
3228                         rdev->config.cik.max_cu_per_sh = 3;
3229                         rdev->config.cik.max_backends_per_se = 1;
3230                 }
3231                 rdev->config.cik.max_sh_per_se = 1;
3232                 rdev->config.cik.max_texture_channel_caches = 4;
3233                 rdev->config.cik.max_gprs = 256;
3234                 rdev->config.cik.max_gs_threads = 16;
3235                 rdev->config.cik.max_hw_contexts = 8;
3236
3237                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3238                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3239                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3240                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3241                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3242                 break;
3243         case CHIP_KABINI:
3244         default:
3245                 rdev->config.cik.max_shader_engines = 1;
3246                 rdev->config.cik.max_tile_pipes = 2;
3247                 rdev->config.cik.max_cu_per_sh = 2;
3248                 rdev->config.cik.max_sh_per_se = 1;
3249                 rdev->config.cik.max_backends_per_se = 1;
3250                 rdev->config.cik.max_texture_channel_caches = 2;
3251                 rdev->config.cik.max_gprs = 256;
3252                 rdev->config.cik.max_gs_threads = 16;
3253                 rdev->config.cik.max_hw_contexts = 8;
3254
3255                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260                 break;
3261         }
3262
3263         /* Initialize HDP */
3264         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265                 WREG32((0x2c14 + j), 0x00000000);
3266                 WREG32((0x2c18 + j), 0x00000000);
3267                 WREG32((0x2c1c + j), 0x00000000);
3268                 WREG32((0x2c20 + j), 0x00000000);
3269                 WREG32((0x2c24 + j), 0x00000000);
3270         }
3271
3272         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273
3274         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3275
3276         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3277         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3278
3279         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3280         rdev->config.cik.mem_max_burst_length_bytes = 256;
3281         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3282         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3283         if (rdev->config.cik.mem_row_size_in_kb > 4)
3284                 rdev->config.cik.mem_row_size_in_kb = 4;
3285         /* XXX use MC settings? */
3286         rdev->config.cik.shader_engine_tile_size = 32;
3287         rdev->config.cik.num_gpus = 1;
3288         rdev->config.cik.multi_gpu_tile_size = 64;
3289
3290         /* fix up row size */
3291         gb_addr_config &= ~ROW_SIZE_MASK;
3292         switch (rdev->config.cik.mem_row_size_in_kb) {
3293         case 1:
3294         default:
3295                 gb_addr_config |= ROW_SIZE(0);
3296                 break;
3297         case 2:
3298                 gb_addr_config |= ROW_SIZE(1);
3299                 break;
3300         case 4:
3301                 gb_addr_config |= ROW_SIZE(2);
3302                 break;
3303         }
3304
3305         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3306          * not have bank info, so create a custom tiling dword.
3307          * bits 3:0   num_pipes
3308          * bits 7:4   num_banks
3309          * bits 11:8  group_size
3310          * bits 15:12 row_size
3311          */
3312         rdev->config.cik.tile_config = 0;
3313         switch (rdev->config.cik.num_tile_pipes) {
3314         case 1:
3315                 rdev->config.cik.tile_config |= (0 << 0);
3316                 break;
3317         case 2:
3318                 rdev->config.cik.tile_config |= (1 << 0);
3319                 break;
3320         case 4:
3321                 rdev->config.cik.tile_config |= (2 << 0);
3322                 break;
3323         case 8:
3324         default:
3325                 /* XXX what about 12? */
3326                 rdev->config.cik.tile_config |= (3 << 0);
3327                 break;
3328         }
3329         rdev->config.cik.tile_config |=
3330                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3331         rdev->config.cik.tile_config |=
3332                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3333         rdev->config.cik.tile_config |=
3334                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3335
3336         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3337         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3338         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3339         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3340         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3341         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3342         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3343         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3344
3345         cik_tiling_mode_table_init(rdev);
3346
3347         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3348                      rdev->config.cik.max_sh_per_se,
3349                      rdev->config.cik.max_backends_per_se);
3350
3351         /* set HW defaults for 3D engine */
3352         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3353
3354         WREG32(SX_DEBUG_1, 0x20);
3355
3356         WREG32(TA_CNTL_AUX, 0x00010000);
3357
3358         tmp = RREG32(SPI_CONFIG_CNTL);
3359         tmp |= 0x03000000;
3360         WREG32(SPI_CONFIG_CNTL, tmp);
3361
3362         WREG32(SQ_CONFIG, 1);
3363
3364         WREG32(DB_DEBUG, 0);
3365
3366         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3367         tmp |= 0x00000400;
3368         WREG32(DB_DEBUG2, tmp);
3369
3370         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3371         tmp |= 0x00020200;
3372         WREG32(DB_DEBUG3, tmp);
3373
3374         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3375         tmp |= 0x00018208;
3376         WREG32(CB_HW_CONTROL, tmp);
3377
3378         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3379
3380         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3381                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3382                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3383                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3384
3385         WREG32(VGT_NUM_INSTANCES, 1);
3386
3387         WREG32(CP_PERFMON_CNTL, 0);
3388
3389         WREG32(SQ_CONFIG, 0);
3390
3391         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3392                                           FORCE_EOV_MAX_REZ_CNT(255)));
3393
3394         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3395                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3396
3397         WREG32(VGT_GS_VERTEX_REUSE, 16);
3398         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3399
3400         tmp = RREG32(HDP_MISC_CNTL);
3401         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3402         WREG32(HDP_MISC_CNTL, tmp);
3403
3404         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3405         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3406
3407         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3408         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3409
3410         udelay(50);
3411 }
3412
3413 /*
3414  * GPU scratch registers helpers function.
3415  */
3416 /**
3417  * cik_scratch_init - setup driver info for CP scratch regs
3418  *
3419  * @rdev: radeon_device pointer
3420  *
3421  * Set up the number and offset of the CP scratch registers.
3422  * NOTE: use of CP scratch registers is a legacy inferface and
3423  * is not used by default on newer asics (r6xx+).  On newer asics,
3424  * memory buffers are used for fences rather than scratch regs.
3425  */
3426 static void cik_scratch_init(struct radeon_device *rdev)
3427 {
3428         int i;
3429
3430         rdev->scratch.num_reg = 7;
3431         rdev->scratch.reg_base = SCRATCH_REG0;
3432         for (i = 0; i < rdev->scratch.num_reg; i++) {
3433                 rdev->scratch.free[i] = true;
3434                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3435         }
3436 }
3437
3438 /**
3439  * cik_ring_test - basic gfx ring test
3440  *
3441  * @rdev: radeon_device pointer
3442  * @ring: radeon_ring structure holding ring information
3443  *
3444  * Allocate a scratch register and write to it using the gfx ring (CIK).
3445  * Provides a basic gfx ring test to verify that the ring is working.
3446  * Used by cik_cp_gfx_resume();
3447  * Returns 0 on success, error on failure.
3448  */
3449 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3450 {
3451         uint32_t scratch;
3452         uint32_t tmp = 0;
3453         unsigned i;
3454         int r;
3455
3456         r = radeon_scratch_get(rdev, &scratch);
3457         if (r) {
3458                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3459                 return r;
3460         }
3461         WREG32(scratch, 0xCAFEDEAD);
3462         r = radeon_ring_lock(rdev, ring, 3);
3463         if (r) {
3464                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3465                 radeon_scratch_free(rdev, scratch);
3466                 return r;
3467         }
3468         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3469         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3470         radeon_ring_write(ring, 0xDEADBEEF);
3471         radeon_ring_unlock_commit(rdev, ring);
3472
3473         for (i = 0; i < rdev->usec_timeout; i++) {
3474                 tmp = RREG32(scratch);
3475                 if (tmp == 0xDEADBEEF)
3476                         break;
3477                 DRM_UDELAY(1);
3478         }
3479         if (i < rdev->usec_timeout) {
3480                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3481         } else {
3482                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3483                           ring->idx, scratch, tmp);
3484                 r = -EINVAL;
3485         }
3486         radeon_scratch_free(rdev, scratch);
3487         return r;
3488 }
3489
3490 /**
3491  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3492  *
3493  * @rdev: radeon_device pointer
3494  * @ridx: radeon ring index
3495  *
3496  * Emits an hdp flush on the cp.
3497  */
3498 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3499                                        int ridx)
3500 {
3501         struct radeon_ring *ring = &rdev->ring[ridx];
3502         u32 ref_and_mask;
3503
3504         switch (ring->idx) {
3505         case CAYMAN_RING_TYPE_CP1_INDEX:
3506         case CAYMAN_RING_TYPE_CP2_INDEX:
3507         default:
3508                 switch (ring->me) {
3509                 case 0:
3510                         ref_and_mask = CP2 << ring->pipe;
3511                         break;
3512                 case 1:
3513                         ref_and_mask = CP6 << ring->pipe;
3514                         break;
3515                 default:
3516                         return;
3517                 }
3518                 break;
3519         case RADEON_RING_TYPE_GFX_INDEX:
3520                 ref_and_mask = CP0;
3521                 break;
3522         }
3523
3524         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3525         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3526                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3527                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3528         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3529         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3530         radeon_ring_write(ring, ref_and_mask);
3531         radeon_ring_write(ring, ref_and_mask);
3532         radeon_ring_write(ring, 0x20); /* poll interval */
3533 }
3534
3535 /**
3536  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3537  *
3538  * @rdev: radeon_device pointer
3539  * @fence: radeon fence object
3540  *
3541  * Emits a fence sequnce number on the gfx ring and flushes
3542  * GPU caches.
3543  */
3544 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3545                              struct radeon_fence *fence)
3546 {
3547         struct radeon_ring *ring = &rdev->ring[fence->ring];
3548         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3549
3550         /* EVENT_WRITE_EOP - flush caches, send int */
3551         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3552         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3553                                  EOP_TC_ACTION_EN |
3554                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3555                                  EVENT_INDEX(5)));
3556         radeon_ring_write(ring, addr & 0xfffffffc);
3557         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3558         radeon_ring_write(ring, fence->seq);
3559         radeon_ring_write(ring, 0);
3560         /* HDP flush */
3561         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3562 }
3563
3564 /**
3565  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3566  *
3567  * @rdev: radeon_device pointer
3568  * @fence: radeon fence object
3569  *
3570  * Emits a fence sequnce number on the compute ring and flushes
3571  * GPU caches.
3572  */
3573 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3574                                  struct radeon_fence *fence)
3575 {
3576         struct radeon_ring *ring = &rdev->ring[fence->ring];
3577         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3578
3579         /* RELEASE_MEM - flush caches, send int */
3580         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3581         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3582                                  EOP_TC_ACTION_EN |
3583                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3584                                  EVENT_INDEX(5)));
3585         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3586         radeon_ring_write(ring, addr & 0xfffffffc);
3587         radeon_ring_write(ring, upper_32_bits(addr));
3588         radeon_ring_write(ring, fence->seq);
3589         radeon_ring_write(ring, 0);
3590         /* HDP flush */
3591         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3592 }
3593
3594 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3595                              struct radeon_ring *ring,
3596                              struct radeon_semaphore *semaphore,
3597                              bool emit_wait)
3598 {
3599         uint64_t addr = semaphore->gpu_addr;
3600         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3601
3602         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3603         radeon_ring_write(ring, addr & 0xffffffff);
3604         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3605
3606         return true;
3607 }
3608
3609 /**
3610  * cik_copy_cpdma - copy pages using the CP DMA engine
3611  *
3612  * @rdev: radeon_device pointer
3613  * @src_offset: src GPU address
3614  * @dst_offset: dst GPU address
3615  * @num_gpu_pages: number of GPU pages to xfer
3616  * @fence: radeon fence object
3617  *
3618  * Copy GPU paging using the CP DMA engine (CIK+).
3619  * Used by the radeon ttm implementation to move pages if
3620  * registered as the asic copy callback.
3621  */
3622 int cik_copy_cpdma(struct radeon_device *rdev,
3623                    uint64_t src_offset, uint64_t dst_offset,
3624                    unsigned num_gpu_pages,
3625                    struct radeon_fence **fence)
3626 {
3627         struct radeon_semaphore *sem = NULL;
3628         int ring_index = rdev->asic->copy.blit_ring_index;
3629         struct radeon_ring *ring = &rdev->ring[ring_index];
3630         u32 size_in_bytes, cur_size_in_bytes, control;
3631         int i, num_loops;
3632         int r = 0;
3633
3634         r = radeon_semaphore_create(rdev, &sem);
3635         if (r) {
3636                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3637                 return r;
3638         }
3639
3640         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3641         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3642         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3643         if (r) {
3644                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3645                 radeon_semaphore_free(rdev, &sem, NULL);
3646                 return r;
3647         }
3648
3649         radeon_semaphore_sync_to(sem, *fence);
3650         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3651
3652         for (i = 0; i < num_loops; i++) {
3653                 cur_size_in_bytes = size_in_bytes;
3654                 if (cur_size_in_bytes > 0x1fffff)
3655                         cur_size_in_bytes = 0x1fffff;
3656                 size_in_bytes -= cur_size_in_bytes;
3657                 control = 0;
3658                 if (size_in_bytes == 0)
3659                         control |= PACKET3_DMA_DATA_CP_SYNC;
3660                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3661                 radeon_ring_write(ring, control);
3662                 radeon_ring_write(ring, lower_32_bits(src_offset));
3663                 radeon_ring_write(ring, upper_32_bits(src_offset));
3664                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3665                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3666                 radeon_ring_write(ring, cur_size_in_bytes);
3667                 src_offset += cur_size_in_bytes;
3668                 dst_offset += cur_size_in_bytes;
3669         }
3670
3671         r = radeon_fence_emit(rdev, fence, ring->idx);
3672         if (r) {
3673                 radeon_ring_unlock_undo(rdev, ring);
3674                 return r;
3675         }
3676
3677         radeon_ring_unlock_commit(rdev, ring);
3678         radeon_semaphore_free(rdev, &sem, *fence);
3679
3680         return r;
3681 }
3682
3683 /*
3684  * IB stuff
3685  */
3686 /**
3687  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3688  *
3689  * @rdev: radeon_device pointer
3690  * @ib: radeon indirect buffer object
3691  *
3692  * Emits an DE (drawing engine) or CE (constant engine) IB
3693  * on the gfx ring.  IBs are usually generated by userspace
3694  * acceleration drivers and submitted to the kernel for
3695  * sheduling on the ring.  This function schedules the IB
3696  * on the gfx ring for execution by the GPU.
3697  */
3698 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3699 {
3700         struct radeon_ring *ring = &rdev->ring[ib->ring];
3701         u32 header, control = INDIRECT_BUFFER_VALID;
3702
3703         if (ib->is_const_ib) {
3704                 /* set switch buffer packet before const IB */
3705                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3706                 radeon_ring_write(ring, 0);
3707
3708                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3709         } else {
3710                 u32 next_rptr;
3711                 if (ring->rptr_save_reg) {
3712                         next_rptr = ring->wptr + 3 + 4;
3713                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3714                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3715                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3716                         radeon_ring_write(ring, next_rptr);
3717                 } else if (rdev->wb.enabled) {
3718                         next_rptr = ring->wptr + 5 + 4;
3719                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3720                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3721                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3722                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3723                         radeon_ring_write(ring, next_rptr);
3724                 }
3725
3726                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3727         }
3728
3729         control |= ib->length_dw |
3730                 (ib->vm ? (ib->vm->id << 24) : 0);
3731
3732         radeon_ring_write(ring, header);
3733         radeon_ring_write(ring,
3734 #ifdef __BIG_ENDIAN
3735                           (2 << 0) |
3736 #endif
3737                           (ib->gpu_addr & 0xFFFFFFFC));
3738         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3739         radeon_ring_write(ring, control);
3740 }
3741
3742 /**
3743  * cik_ib_test - basic gfx ring IB test
3744  *
3745  * @rdev: radeon_device pointer
3746  * @ring: radeon_ring structure holding ring information
3747  *
3748  * Allocate an IB and execute it on the gfx ring (CIK).
3749  * Provides a basic gfx ring test to verify that IBs are working.
3750  * Returns 0 on success, error on failure.
3751  */
3752 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3753 {
3754         struct radeon_ib ib;
3755         uint32_t scratch;
3756         uint32_t tmp = 0;
3757         unsigned i;
3758         int r;
3759
3760         r = radeon_scratch_get(rdev, &scratch);
3761         if (r) {
3762                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3763                 return r;
3764         }
3765         WREG32(scratch, 0xCAFEDEAD);
3766         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3767         if (r) {
3768                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3769                 radeon_scratch_free(rdev, scratch);
3770                 return r;
3771         }
3772         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3773         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3774         ib.ptr[2] = 0xDEADBEEF;
3775         ib.length_dw = 3;
3776         r = radeon_ib_schedule(rdev, &ib, NULL);
3777         if (r) {
3778                 radeon_scratch_free(rdev, scratch);
3779                 radeon_ib_free(rdev, &ib);
3780                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3781                 return r;
3782         }
3783         r = radeon_fence_wait(ib.fence, false);
3784         if (r) {
3785                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3786                 radeon_scratch_free(rdev, scratch);
3787                 radeon_ib_free(rdev, &ib);
3788                 return r;
3789         }
3790         for (i = 0; i < rdev->usec_timeout; i++) {
3791                 tmp = RREG32(scratch);
3792                 if (tmp == 0xDEADBEEF)
3793                         break;
3794                 DRM_UDELAY(1);
3795         }
3796         if (i < rdev->usec_timeout) {
3797                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3798         } else {
3799                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3800                           scratch, tmp);
3801                 r = -EINVAL;
3802         }
3803         radeon_scratch_free(rdev, scratch);
3804         radeon_ib_free(rdev, &ib);
3805         return r;
3806 }
3807
3808 /*
3809  * CP.
3810  * On CIK, gfx and compute now have independant command processors.
3811  *
3812  * GFX
3813  * Gfx consists of a single ring and can process both gfx jobs and
3814  * compute jobs.  The gfx CP consists of three microengines (ME):
3815  * PFP - Pre-Fetch Parser
3816  * ME - Micro Engine
3817  * CE - Constant Engine
3818  * The PFP and ME make up what is considered the Drawing Engine (DE).
3819  * The CE is an asynchronous engine used for updating buffer desciptors
3820  * used by the DE so that they can be loaded into cache in parallel
3821  * while the DE is processing state update packets.
3822  *
3823  * Compute
3824  * The compute CP consists of two microengines (ME):
3825  * MEC1 - Compute MicroEngine 1
3826  * MEC2 - Compute MicroEngine 2
3827  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3828  * The queues are exposed to userspace and are programmed directly
3829  * by the compute runtime.
3830  */
3831 /**
3832  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3833  *
3834  * @rdev: radeon_device pointer
3835  * @enable: enable or disable the MEs
3836  *
3837  * Halts or unhalts the gfx MEs.
3838  */
3839 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3840 {
3841         if (enable)
3842                 WREG32(CP_ME_CNTL, 0);
3843         else {
3844                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3845                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3846                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3847                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3848         }
3849         udelay(50);
3850 }
3851
3852 /**
3853  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3854  *
3855  * @rdev: radeon_device pointer
3856  *
3857  * Loads the gfx PFP, ME, and CE ucode.
3858  * Returns 0 for success, -EINVAL if the ucode is not available.
3859  */
3860 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3861 {
3862         const __be32 *fw_data;
3863         int i;
3864
3865         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3866                 return -EINVAL;
3867
3868         cik_cp_gfx_enable(rdev, false);
3869
3870         /* PFP */
3871         fw_data = (const __be32 *)rdev->pfp_fw->data;
3872         WREG32(CP_PFP_UCODE_ADDR, 0);
3873         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3874                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3875         WREG32(CP_PFP_UCODE_ADDR, 0);
3876
3877         /* CE */
3878         fw_data = (const __be32 *)rdev->ce_fw->data;
3879         WREG32(CP_CE_UCODE_ADDR, 0);
3880         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3881                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3882         WREG32(CP_CE_UCODE_ADDR, 0);
3883
3884         /* ME */
3885         fw_data = (const __be32 *)rdev->me_fw->data;
3886         WREG32(CP_ME_RAM_WADDR, 0);
3887         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3888                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3889         WREG32(CP_ME_RAM_WADDR, 0);
3890
3891         WREG32(CP_PFP_UCODE_ADDR, 0);
3892         WREG32(CP_CE_UCODE_ADDR, 0);
3893         WREG32(CP_ME_RAM_WADDR, 0);
3894         WREG32(CP_ME_RAM_RADDR, 0);
3895         return 0;
3896 }
3897
3898 /**
3899  * cik_cp_gfx_start - start the gfx ring
3900  *
3901  * @rdev: radeon_device pointer
3902  *
3903  * Enables the ring and loads the clear state context and other
3904  * packets required to init the ring.
3905  * Returns 0 for success, error for failure.
3906  */
3907 static int cik_cp_gfx_start(struct radeon_device *rdev)
3908 {
3909         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3910         int r, i;
3911
3912         /* init the CP */
3913         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3914         WREG32(CP_ENDIAN_SWAP, 0);
3915         WREG32(CP_DEVICE_ID, 1);
3916
3917         cik_cp_gfx_enable(rdev, true);
3918
3919         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3920         if (r) {
3921                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3922                 return r;
3923         }
3924
3925         /* init the CE partitions.  CE only used for gfx on CIK */
3926         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3927         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3928         radeon_ring_write(ring, 0xc000);
3929         radeon_ring_write(ring, 0xc000);
3930
3931         /* setup clear context state */
3932         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3933         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3934
3935         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3936         radeon_ring_write(ring, 0x80000000);
3937         radeon_ring_write(ring, 0x80000000);
3938
3939         for (i = 0; i < cik_default_size; i++)
3940                 radeon_ring_write(ring, cik_default_state[i]);
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3943         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3944
3945         /* set clear context state */
3946         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3947         radeon_ring_write(ring, 0);
3948
3949         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3950         radeon_ring_write(ring, 0x00000316);
3951         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3952         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3953
3954         radeon_ring_unlock_commit(rdev, ring);
3955
3956         return 0;
3957 }
3958
3959 /**
3960  * cik_cp_gfx_fini - stop the gfx ring
3961  *
3962  * @rdev: radeon_device pointer
3963  *
3964  * Stop the gfx ring and tear down the driver ring
3965  * info.
3966  */
3967 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3968 {
3969         cik_cp_gfx_enable(rdev, false);
3970         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3971 }
3972
3973 /**
3974  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3975  *
3976  * @rdev: radeon_device pointer
3977  *
3978  * Program the location and size of the gfx ring buffer
3979  * and test it to make sure it's working.
3980  * Returns 0 for success, error for failure.
3981  */
3982 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3983 {
3984         struct radeon_ring *ring;
3985         u32 tmp;
3986         u32 rb_bufsz;
3987         u64 rb_addr;
3988         int r;
3989
3990         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3991         if (rdev->family != CHIP_HAWAII)
3992                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3993
3994         /* Set the write pointer delay */
3995         WREG32(CP_RB_WPTR_DELAY, 0);
3996
3997         /* set the RB to use vmid 0 */
3998         WREG32(CP_RB_VMID, 0);
3999
4000         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4001
4002         /* ring 0 - compute and gfx */
4003         /* Set ring buffer size */
4004         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4005         rb_bufsz = order_base_2(ring->ring_size / 8);
4006         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4007 #ifdef __BIG_ENDIAN
4008         tmp |= BUF_SWAP_32BIT;
4009 #endif
4010         WREG32(CP_RB0_CNTL, tmp);
4011
4012         /* Initialize the ring buffer's read and write pointers */
4013         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4014         ring->wptr = 0;
4015         WREG32(CP_RB0_WPTR, ring->wptr);
4016
4017         /* set the wb address wether it's enabled or not */
4018         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4019         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4020
4021         /* scratch register shadowing is no longer supported */
4022         WREG32(SCRATCH_UMSK, 0);
4023
4024         if (!rdev->wb.enabled)
4025                 tmp |= RB_NO_UPDATE;
4026
4027         mdelay(1);
4028         WREG32(CP_RB0_CNTL, tmp);
4029
4030         rb_addr = ring->gpu_addr >> 8;
4031         WREG32(CP_RB0_BASE, rb_addr);
4032         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4033
4034         ring->rptr = RREG32(CP_RB0_RPTR);
4035
4036         /* start the ring */
4037         cik_cp_gfx_start(rdev);
4038         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4039         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4040         if (r) {
4041                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4042                 return r;
4043         }
4044
4045         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4046                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4047
4048         return 0;
4049 }
4050
4051 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4052                      struct radeon_ring *ring)
4053 {
4054         u32 rptr;
4055
4056         if (rdev->wb.enabled)
4057                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4058         else
4059                 rptr = RREG32(CP_RB0_RPTR);
4060
4061         return rptr;
4062 }
4063
4064 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4065                      struct radeon_ring *ring)
4066 {
4067         u32 wptr;
4068
4069         wptr = RREG32(CP_RB0_WPTR);
4070
4071         return wptr;
4072 }
4073
4074 void cik_gfx_set_wptr(struct radeon_device *rdev,
4075                       struct radeon_ring *ring)
4076 {
4077         WREG32(CP_RB0_WPTR, ring->wptr);
4078         (void)RREG32(CP_RB0_WPTR);
4079 }
4080
4081 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4082                          struct radeon_ring *ring)
4083 {
4084         u32 rptr;
4085
4086         if (rdev->wb.enabled) {
4087                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4088         } else {
4089                 mutex_lock(&rdev->srbm_mutex);
4090                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4091                 rptr = RREG32(CP_HQD_PQ_RPTR);
4092                 cik_srbm_select(rdev, 0, 0, 0, 0);
4093                 mutex_unlock(&rdev->srbm_mutex);
4094         }
4095
4096         return rptr;
4097 }
4098
4099 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4100                          struct radeon_ring *ring)
4101 {
4102         u32 wptr;
4103
4104         if (rdev->wb.enabled) {
4105                 /* XXX check if swapping is necessary on BE */
4106                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4107         } else {
4108                 mutex_lock(&rdev->srbm_mutex);
4109                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4110                 wptr = RREG32(CP_HQD_PQ_WPTR);
4111                 cik_srbm_select(rdev, 0, 0, 0, 0);
4112                 mutex_unlock(&rdev->srbm_mutex);
4113         }
4114
4115         return wptr;
4116 }
4117
4118 void cik_compute_set_wptr(struct radeon_device *rdev,
4119                           struct radeon_ring *ring)
4120 {
4121         /* XXX check if swapping is necessary on BE */
4122         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4123         WDOORBELL32(ring->doorbell_index, ring->wptr);
4124 }
4125
4126 /**
4127  * cik_cp_compute_enable - enable/disable the compute CP MEs
4128  *
4129  * @rdev: radeon_device pointer
4130  * @enable: enable or disable the MEs
4131  *
4132  * Halts or unhalts the compute MEs.
4133  */
4134 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4135 {
4136         if (enable)
4137                 WREG32(CP_MEC_CNTL, 0);
4138         else
4139                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4140         udelay(50);
4141 }
4142
4143 /**
4144  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4145  *
4146  * @rdev: radeon_device pointer
4147  *
4148  * Loads the compute MEC1&2 ucode.
4149  * Returns 0 for success, -EINVAL if the ucode is not available.
4150  */
4151 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4152 {
4153         const __be32 *fw_data;
4154         int i;
4155
4156         if (!rdev->mec_fw)
4157                 return -EINVAL;
4158
4159         cik_cp_compute_enable(rdev, false);
4160
4161         /* MEC1 */
4162         fw_data = (const __be32 *)rdev->mec_fw->data;
4163         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4164         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4165                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4166         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4167
4168         if (rdev->family == CHIP_KAVERI) {
4169                 /* MEC2 */
4170                 fw_data = (const __be32 *)rdev->mec_fw->data;
4171                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4172                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4173                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4174                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4175         }
4176
4177         return 0;
4178 }
4179
4180 /**
4181  * cik_cp_compute_start - start the compute queues
4182  *
4183  * @rdev: radeon_device pointer
4184  *
4185  * Enable the compute queues.
4186  * Returns 0 for success, error for failure.
4187  */
4188 static int cik_cp_compute_start(struct radeon_device *rdev)
4189 {
4190         cik_cp_compute_enable(rdev, true);
4191
4192         return 0;
4193 }
4194
4195 /**
4196  * cik_cp_compute_fini - stop the compute queues
4197  *
4198  * @rdev: radeon_device pointer
4199  *
4200  * Stop the compute queues and tear down the driver queue
4201  * info.
4202  */
4203 static void cik_cp_compute_fini(struct radeon_device *rdev)
4204 {
4205         int i, idx, r;
4206
4207         cik_cp_compute_enable(rdev, false);
4208
4209         for (i = 0; i < 2; i++) {
4210                 if (i == 0)
4211                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4212                 else
4213                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4214
4215                 if (rdev->ring[idx].mqd_obj) {
4216                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4217                         if (unlikely(r != 0))
4218                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4219
4220                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4221                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4222
4223                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4224                         rdev->ring[idx].mqd_obj = NULL;
4225                 }
4226         }
4227 }
4228
4229 static void cik_mec_fini(struct radeon_device *rdev)
4230 {
4231         int r;
4232
4233         if (rdev->mec.hpd_eop_obj) {
4234                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4235                 if (unlikely(r != 0))
4236                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4237                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4238                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4239
4240                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4241                 rdev->mec.hpd_eop_obj = NULL;
4242         }
4243 }
4244
4245 #define MEC_HPD_SIZE 2048
4246
4247 static int cik_mec_init(struct radeon_device *rdev)
4248 {
4249         int r;
4250         u32 *hpd;
4251
4252         /*
4253          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4254          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4255          */
4256         if (rdev->family == CHIP_KAVERI)
4257                 rdev->mec.num_mec = 2;
4258         else
4259                 rdev->mec.num_mec = 1;
4260         rdev->mec.num_pipe = 4;
4261         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4262
4263         if (rdev->mec.hpd_eop_obj == NULL) {
4264                 r = radeon_bo_create(rdev,
4265                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4266                                      PAGE_SIZE, true,
4267                                      RADEON_GEM_DOMAIN_GTT, NULL,
4268                                      &rdev->mec.hpd_eop_obj);
4269                 if (r) {
4270                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4271                         return r;
4272                 }
4273         }
4274
4275         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4276         if (unlikely(r != 0)) {
4277                 cik_mec_fini(rdev);
4278                 return r;
4279         }
4280         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4281                           &rdev->mec.hpd_eop_gpu_addr);
4282         if (r) {
4283                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4284                 cik_mec_fini(rdev);
4285                 return r;
4286         }
4287         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4288         if (r) {
4289                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4290                 cik_mec_fini(rdev);
4291                 return r;
4292         }
4293
4294         /* clear memory.  Not sure if this is required or not */
4295         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4296
4297         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4298         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4299
4300         return 0;
4301 }
4302
4303 struct hqd_registers
4304 {
4305         u32 cp_mqd_base_addr;
4306         u32 cp_mqd_base_addr_hi;
4307         u32 cp_hqd_active;
4308         u32 cp_hqd_vmid;
4309         u32 cp_hqd_persistent_state;
4310         u32 cp_hqd_pipe_priority;
4311         u32 cp_hqd_queue_priority;
4312         u32 cp_hqd_quantum;
4313         u32 cp_hqd_pq_base;
4314         u32 cp_hqd_pq_base_hi;
4315         u32 cp_hqd_pq_rptr;
4316         u32 cp_hqd_pq_rptr_report_addr;
4317         u32 cp_hqd_pq_rptr_report_addr_hi;
4318         u32 cp_hqd_pq_wptr_poll_addr;
4319         u32 cp_hqd_pq_wptr_poll_addr_hi;
4320         u32 cp_hqd_pq_doorbell_control;
4321         u32 cp_hqd_pq_wptr;
4322         u32 cp_hqd_pq_control;
4323         u32 cp_hqd_ib_base_addr;
4324         u32 cp_hqd_ib_base_addr_hi;
4325         u32 cp_hqd_ib_rptr;
4326         u32 cp_hqd_ib_control;
4327         u32 cp_hqd_iq_timer;
4328         u32 cp_hqd_iq_rptr;
4329         u32 cp_hqd_dequeue_request;
4330         u32 cp_hqd_dma_offload;
4331         u32 cp_hqd_sema_cmd;
4332         u32 cp_hqd_msg_type;
4333         u32 cp_hqd_atomic0_preop_lo;
4334         u32 cp_hqd_atomic0_preop_hi;
4335         u32 cp_hqd_atomic1_preop_lo;
4336         u32 cp_hqd_atomic1_preop_hi;
4337         u32 cp_hqd_hq_scheduler0;
4338         u32 cp_hqd_hq_scheduler1;
4339         u32 cp_mqd_control;
4340 };
4341
4342 struct bonaire_mqd
4343 {
4344         u32 header;
4345         u32 dispatch_initiator;
4346         u32 dimensions[3];
4347         u32 start_idx[3];
4348         u32 num_threads[3];
4349         u32 pipeline_stat_enable;
4350         u32 perf_counter_enable;
4351         u32 pgm[2];
4352         u32 tba[2];
4353         u32 tma[2];
4354         u32 pgm_rsrc[2];
4355         u32 vmid;
4356         u32 resource_limits;
4357         u32 static_thread_mgmt01[2];
4358         u32 tmp_ring_size;
4359         u32 static_thread_mgmt23[2];
4360         u32 restart[3];
4361         u32 thread_trace_enable;
4362         u32 reserved1;
4363         u32 user_data[16];
4364         u32 vgtcs_invoke_count[2];
4365         struct hqd_registers queue_state;
4366         u32 dequeue_cntr;
4367         u32 interrupt_queue[64];
4368 };
4369
4370 /**
4371  * cik_cp_compute_resume - setup the compute queue registers
4372  *
4373  * @rdev: radeon_device pointer
4374  *
4375  * Program the compute queues and test them to make sure they
4376  * are working.
4377  * Returns 0 for success, error for failure.
4378  */
4379 static int cik_cp_compute_resume(struct radeon_device *rdev)
4380 {
4381         int r, i, idx;
4382         u32 tmp;
4383         bool use_doorbell = true;
4384         u64 hqd_gpu_addr;
4385         u64 mqd_gpu_addr;
4386         u64 eop_gpu_addr;
4387         u64 wb_gpu_addr;
4388         u32 *buf;
4389         struct bonaire_mqd *mqd;
4390
4391         r = cik_cp_compute_start(rdev);
4392         if (r)
4393                 return r;
4394
4395         /* fix up chicken bits */
4396         tmp = RREG32(CP_CPF_DEBUG);
4397         tmp |= (1 << 23);
4398         WREG32(CP_CPF_DEBUG, tmp);
4399
4400         /* init the pipes */
4401         mutex_lock(&rdev->srbm_mutex);
4402         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4403                 int me = (i < 4) ? 1 : 2;
4404                 int pipe = (i < 4) ? i : (i - 4);
4405
4406                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4407
4408                 cik_srbm_select(rdev, me, pipe, 0, 0);
4409
4410                 /* write the EOP addr */
4411                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4412                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4413
4414                 /* set the VMID assigned */
4415                 WREG32(CP_HPD_EOP_VMID, 0);
4416
4417                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4418                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4419                 tmp &= ~EOP_SIZE_MASK;
4420                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4421                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4422         }
4423         cik_srbm_select(rdev, 0, 0, 0, 0);
4424         mutex_unlock(&rdev->srbm_mutex);
4425
4426         /* init the queues.  Just two for now. */
4427         for (i = 0; i < 2; i++) {
4428                 if (i == 0)
4429                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4430                 else
4431                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4432
4433                 if (rdev->ring[idx].mqd_obj == NULL) {
4434                         r = radeon_bo_create(rdev,
4435                                              sizeof(struct bonaire_mqd),
4436                                              PAGE_SIZE, true,
4437                                              RADEON_GEM_DOMAIN_GTT, NULL,
4438                                              &rdev->ring[idx].mqd_obj);
4439                         if (r) {
4440                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4441                                 return r;
4442                         }
4443                 }
4444
4445                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4446                 if (unlikely(r != 0)) {
4447                         cik_cp_compute_fini(rdev);
4448                         return r;
4449                 }
4450                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4451                                   &mqd_gpu_addr);
4452                 if (r) {
4453                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4454                         cik_cp_compute_fini(rdev);
4455                         return r;
4456                 }
4457                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4458                 if (r) {
4459                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4460                         cik_cp_compute_fini(rdev);
4461                         return r;
4462                 }
4463
4464                 /* init the mqd struct */
4465                 memset(buf, 0, sizeof(struct bonaire_mqd));
4466
4467                 mqd = (struct bonaire_mqd *)buf;
4468                 mqd->header = 0xC0310800;
4469                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4470                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4471                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4472                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4473
4474                 mutex_lock(&rdev->srbm_mutex);
4475                 cik_srbm_select(rdev, rdev->ring[idx].me,
4476                                 rdev->ring[idx].pipe,
4477                                 rdev->ring[idx].queue, 0);
4478
4479                 /* disable wptr polling */
4480                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4481                 tmp &= ~WPTR_POLL_EN;
4482                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4483
4484                 /* enable doorbell? */
4485                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4486                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4487                 if (use_doorbell)
4488                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4489                 else
4490                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4491                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4492                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4493
4494                 /* disable the queue if it's active */
4495                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4496                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4497                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4498                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4499                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4500                         for (i = 0; i < rdev->usec_timeout; i++) {
4501                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4502                                         break;
4503                                 udelay(1);
4504                         }
4505                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4506                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4507                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4508                 }
4509
4510                 /* set the pointer to the MQD */
4511                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4512                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4513                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4514                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4515                 /* set MQD vmid to 0 */
4516                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4517                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4518                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4519
4520                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4521                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4522                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4523                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4524                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4525                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4526
4527                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4528                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4529                 mqd->queue_state.cp_hqd_pq_control &=
4530                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4531
4532                 mqd->queue_state.cp_hqd_pq_control |=
4533                         order_base_2(rdev->ring[idx].ring_size / 8);
4534                 mqd->queue_state.cp_hqd_pq_control |=
4535                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4536 #ifdef __BIG_ENDIAN
4537                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4538 #endif
4539                 mqd->queue_state.cp_hqd_pq_control &=
4540                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4541                 mqd->queue_state.cp_hqd_pq_control |=
4542                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4543                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4544
4545                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4546                 if (i == 0)
4547                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4548                 else
4549                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4550                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4551                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4552                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4553                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4554                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4555
4556                 /* set the wb address wether it's enabled or not */
4557                 if (i == 0)
4558                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4559                 else
4560                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4561                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4562                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4563                         upper_32_bits(wb_gpu_addr) & 0xffff;
4564                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4565                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4566                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4567                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4568
4569                 /* enable the doorbell if requested */
4570                 if (use_doorbell) {
4571                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4572                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4573                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4574                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4575                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4576                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4577                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4578                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4579
4580                 } else {
4581                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4582                 }
4583                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4584                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4585
4586                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4587                 rdev->ring[idx].wptr = 0;
4588                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4589                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4590                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4591                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4592
4593                 /* set the vmid for the queue */
4594                 mqd->queue_state.cp_hqd_vmid = 0;
4595                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4596
4597                 /* activate the queue */
4598                 mqd->queue_state.cp_hqd_active = 1;
4599                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4600
4601                 cik_srbm_select(rdev, 0, 0, 0, 0);
4602                 mutex_unlock(&rdev->srbm_mutex);
4603
4604                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4605                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4606
4607                 rdev->ring[idx].ready = true;
4608                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4609                 if (r)
4610                         rdev->ring[idx].ready = false;
4611         }
4612
4613         return 0;
4614 }
4615
4616 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4617 {
4618         cik_cp_gfx_enable(rdev, enable);
4619         cik_cp_compute_enable(rdev, enable);
4620 }
4621
4622 static int cik_cp_load_microcode(struct radeon_device *rdev)
4623 {
4624         int r;
4625
4626         r = cik_cp_gfx_load_microcode(rdev);
4627         if (r)
4628                 return r;
4629         r = cik_cp_compute_load_microcode(rdev);
4630         if (r)
4631                 return r;
4632
4633         return 0;
4634 }
4635
4636 static void cik_cp_fini(struct radeon_device *rdev)
4637 {
4638         cik_cp_gfx_fini(rdev);
4639         cik_cp_compute_fini(rdev);
4640 }
4641
4642 static int cik_cp_resume(struct radeon_device *rdev)
4643 {
4644         int r;
4645
4646         cik_enable_gui_idle_interrupt(rdev, false);
4647
4648         r = cik_cp_load_microcode(rdev);
4649         if (r)
4650                 return r;
4651
4652         r = cik_cp_gfx_resume(rdev);
4653         if (r)
4654                 return r;
4655         r = cik_cp_compute_resume(rdev);
4656         if (r)
4657                 return r;
4658
4659         cik_enable_gui_idle_interrupt(rdev, true);
4660
4661         return 0;
4662 }
4663
4664 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4665 {
4666         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4667                 RREG32(GRBM_STATUS));
4668         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4669                 RREG32(GRBM_STATUS2));
4670         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4671                 RREG32(GRBM_STATUS_SE0));
4672         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4673                 RREG32(GRBM_STATUS_SE1));
4674         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4675                 RREG32(GRBM_STATUS_SE2));
4676         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4677                 RREG32(GRBM_STATUS_SE3));
4678         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4679                 RREG32(SRBM_STATUS));
4680         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4681                 RREG32(SRBM_STATUS2));
4682         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4683                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4684         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4685                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4686         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4687         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4688                  RREG32(CP_STALLED_STAT1));
4689         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4690                  RREG32(CP_STALLED_STAT2));
4691         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4692                  RREG32(CP_STALLED_STAT3));
4693         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4694                  RREG32(CP_CPF_BUSY_STAT));
4695         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4696                  RREG32(CP_CPF_STALLED_STAT1));
4697         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4698         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4699         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4700                  RREG32(CP_CPC_STALLED_STAT1));
4701         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4702 }
4703
4704 /**
4705  * cik_gpu_check_soft_reset - check which blocks are busy
4706  *
4707  * @rdev: radeon_device pointer
4708  *
4709  * Check which blocks are busy and return the relevant reset
4710  * mask to be used by cik_gpu_soft_reset().
4711  * Returns a mask of the blocks to be reset.
4712  */
4713 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4714 {
4715         u32 reset_mask = 0;
4716         u32 tmp;
4717
4718         /* GRBM_STATUS */
4719         tmp = RREG32(GRBM_STATUS);
4720         if (tmp & (PA_BUSY | SC_BUSY |
4721                    BCI_BUSY | SX_BUSY |
4722                    TA_BUSY | VGT_BUSY |
4723                    DB_BUSY | CB_BUSY |
4724                    GDS_BUSY | SPI_BUSY |
4725                    IA_BUSY | IA_BUSY_NO_DMA))
4726                 reset_mask |= RADEON_RESET_GFX;
4727
4728         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4729                 reset_mask |= RADEON_RESET_CP;
4730
4731         /* GRBM_STATUS2 */
4732         tmp = RREG32(GRBM_STATUS2);
4733         if (tmp & RLC_BUSY)
4734                 reset_mask |= RADEON_RESET_RLC;
4735
4736         /* SDMA0_STATUS_REG */
4737         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4738         if (!(tmp & SDMA_IDLE))
4739                 reset_mask |= RADEON_RESET_DMA;
4740
4741         /* SDMA1_STATUS_REG */
4742         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4743         if (!(tmp & SDMA_IDLE))
4744                 reset_mask |= RADEON_RESET_DMA1;
4745
4746         /* SRBM_STATUS2 */
4747         tmp = RREG32(SRBM_STATUS2);
4748         if (tmp & SDMA_BUSY)
4749                 reset_mask |= RADEON_RESET_DMA;
4750
4751         if (tmp & SDMA1_BUSY)
4752                 reset_mask |= RADEON_RESET_DMA1;
4753
4754         /* SRBM_STATUS */
4755         tmp = RREG32(SRBM_STATUS);
4756
4757         if (tmp & IH_BUSY)
4758                 reset_mask |= RADEON_RESET_IH;
4759
4760         if (tmp & SEM_BUSY)
4761                 reset_mask |= RADEON_RESET_SEM;
4762
4763         if (tmp & GRBM_RQ_PENDING)
4764                 reset_mask |= RADEON_RESET_GRBM;
4765
4766         if (tmp & VMC_BUSY)
4767                 reset_mask |= RADEON_RESET_VMC;
4768
4769         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4770                    MCC_BUSY | MCD_BUSY))
4771                 reset_mask |= RADEON_RESET_MC;
4772
4773         if (evergreen_is_display_hung(rdev))
4774                 reset_mask |= RADEON_RESET_DISPLAY;
4775
4776         /* Skip MC reset as it's mostly likely not hung, just busy */
4777         if (reset_mask & RADEON_RESET_MC) {
4778                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4779                 reset_mask &= ~RADEON_RESET_MC;
4780         }
4781
4782         return reset_mask;
4783 }
4784
4785 /**
4786  * cik_gpu_soft_reset - soft reset GPU
4787  *
4788  * @rdev: radeon_device pointer
4789  * @reset_mask: mask of which blocks to reset
4790  *
4791  * Soft reset the blocks specified in @reset_mask.
4792  */
4793 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4794 {
4795         struct evergreen_mc_save save;
4796         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4797         u32 tmp;
4798
4799         if (reset_mask == 0)
4800                 return;
4801
4802         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4803
4804         cik_print_gpu_status_regs(rdev);
4805         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4806                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4807         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4808                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4809
4810         /* disable CG/PG */
4811         cik_fini_pg(rdev);
4812         cik_fini_cg(rdev);
4813
4814         /* stop the rlc */
4815         cik_rlc_stop(rdev);
4816
4817         /* Disable GFX parsing/prefetching */
4818         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4819
4820         /* Disable MEC parsing/prefetching */
4821         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4822
4823         if (reset_mask & RADEON_RESET_DMA) {
4824                 /* sdma0 */
4825                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4826                 tmp |= SDMA_HALT;
4827                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4828         }
4829         if (reset_mask & RADEON_RESET_DMA1) {
4830                 /* sdma1 */
4831                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4832                 tmp |= SDMA_HALT;
4833                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4834         }
4835
4836         evergreen_mc_stop(rdev, &save);
4837         if (evergreen_mc_wait_for_idle(rdev)) {
4838                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4839         }
4840
4841         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4842                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4843
4844         if (reset_mask & RADEON_RESET_CP) {
4845                 grbm_soft_reset |= SOFT_RESET_CP;
4846
4847                 srbm_soft_reset |= SOFT_RESET_GRBM;
4848         }
4849
4850         if (reset_mask & RADEON_RESET_DMA)
4851                 srbm_soft_reset |= SOFT_RESET_SDMA;
4852
4853         if (reset_mask & RADEON_RESET_DMA1)
4854                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4855
4856         if (reset_mask & RADEON_RESET_DISPLAY)
4857                 srbm_soft_reset |= SOFT_RESET_DC;
4858
4859         if (reset_mask & RADEON_RESET_RLC)
4860                 grbm_soft_reset |= SOFT_RESET_RLC;
4861
4862         if (reset_mask & RADEON_RESET_SEM)
4863                 srbm_soft_reset |= SOFT_RESET_SEM;
4864
4865         if (reset_mask & RADEON_RESET_IH)
4866                 srbm_soft_reset |= SOFT_RESET_IH;
4867
4868         if (reset_mask & RADEON_RESET_GRBM)
4869                 srbm_soft_reset |= SOFT_RESET_GRBM;
4870
4871         if (reset_mask & RADEON_RESET_VMC)
4872                 srbm_soft_reset |= SOFT_RESET_VMC;
4873
4874         if (!(rdev->flags & RADEON_IS_IGP)) {
4875                 if (reset_mask & RADEON_RESET_MC)
4876                         srbm_soft_reset |= SOFT_RESET_MC;
4877         }
4878
4879         if (grbm_soft_reset) {
4880                 tmp = RREG32(GRBM_SOFT_RESET);
4881                 tmp |= grbm_soft_reset;
4882                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4883                 WREG32(GRBM_SOFT_RESET, tmp);
4884                 tmp = RREG32(GRBM_SOFT_RESET);
4885
4886                 udelay(50);
4887
4888                 tmp &= ~grbm_soft_reset;
4889                 WREG32(GRBM_SOFT_RESET, tmp);
4890                 tmp = RREG32(GRBM_SOFT_RESET);
4891         }
4892
4893         if (srbm_soft_reset) {
4894                 tmp = RREG32(SRBM_SOFT_RESET);
4895                 tmp |= srbm_soft_reset;
4896                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4897                 WREG32(SRBM_SOFT_RESET, tmp);
4898                 tmp = RREG32(SRBM_SOFT_RESET);
4899
4900                 udelay(50);
4901
4902                 tmp &= ~srbm_soft_reset;
4903                 WREG32(SRBM_SOFT_RESET, tmp);
4904                 tmp = RREG32(SRBM_SOFT_RESET);
4905         }
4906
4907         /* Wait a little for things to settle down */
4908         udelay(50);
4909
4910         evergreen_mc_resume(rdev, &save);
4911         udelay(50);
4912
4913         cik_print_gpu_status_regs(rdev);
4914 }
4915
4916 struct kv_reset_save_regs {
4917         u32 gmcon_reng_execute;
4918         u32 gmcon_misc;
4919         u32 gmcon_misc3;
4920 };
4921
4922 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4923                                    struct kv_reset_save_regs *save)
4924 {
4925         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4926         save->gmcon_misc = RREG32(GMCON_MISC);
4927         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4928
4929         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4930         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4931                                                 STCTRL_STUTTER_EN));
4932 }
4933
4934 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4935                                       struct kv_reset_save_regs *save)
4936 {
4937         int i;
4938
4939         WREG32(GMCON_PGFSM_WRITE, 0);
4940         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4941
4942         for (i = 0; i < 5; i++)
4943                 WREG32(GMCON_PGFSM_WRITE, 0);
4944
4945         WREG32(GMCON_PGFSM_WRITE, 0);
4946         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4947
4948         for (i = 0; i < 5; i++)
4949                 WREG32(GMCON_PGFSM_WRITE, 0);
4950
4951         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4952         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4953
4954         for (i = 0; i < 5; i++)
4955                 WREG32(GMCON_PGFSM_WRITE, 0);
4956
4957         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4958         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4959
4960         for (i = 0; i < 5; i++)
4961                 WREG32(GMCON_PGFSM_WRITE, 0);
4962
4963         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4964         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4965
4966         for (i = 0; i < 5; i++)
4967                 WREG32(GMCON_PGFSM_WRITE, 0);
4968
4969         WREG32(GMCON_PGFSM_WRITE, 0);
4970         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4971
4972         for (i = 0; i < 5; i++)
4973                 WREG32(GMCON_PGFSM_WRITE, 0);
4974
4975         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4976         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4977
4978         for (i = 0; i < 5; i++)
4979                 WREG32(GMCON_PGFSM_WRITE, 0);
4980
4981         WREG32(GMCON_PGFSM_WRITE, 0x120202);
4982         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4983
4984         for (i = 0; i < 5; i++)
4985                 WREG32(GMCON_PGFSM_WRITE, 0);
4986
4987         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4988         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4989
4990         for (i = 0; i < 5; i++)
4991                 WREG32(GMCON_PGFSM_WRITE, 0);
4992
4993         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4994         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4995
4996         for (i = 0; i < 5; i++)
4997                 WREG32(GMCON_PGFSM_WRITE, 0);
4998
4999         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5000         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5001
5002         WREG32(GMCON_MISC3, save->gmcon_misc3);
5003         WREG32(GMCON_MISC, save->gmcon_misc);
5004         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5005 }
5006
5007 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5008 {
5009         struct evergreen_mc_save save;
5010         struct kv_reset_save_regs kv_save = { 0 };
5011         u32 tmp, i;
5012
5013         dev_info(rdev->dev, "GPU pci config reset\n");
5014
5015         /* disable dpm? */
5016
5017         /* disable cg/pg */
5018         cik_fini_pg(rdev);
5019         cik_fini_cg(rdev);
5020
5021         /* Disable GFX parsing/prefetching */
5022         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5023
5024         /* Disable MEC parsing/prefetching */
5025         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5026
5027         /* sdma0 */
5028         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5029         tmp |= SDMA_HALT;
5030         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5031         /* sdma1 */
5032         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5033         tmp |= SDMA_HALT;
5034         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5035         /* XXX other engines? */
5036
5037         /* halt the rlc, disable cp internal ints */
5038         cik_rlc_stop(rdev);
5039
5040         udelay(50);
5041
5042         /* disable mem access */
5043         evergreen_mc_stop(rdev, &save);
5044         if (evergreen_mc_wait_for_idle(rdev)) {
5045                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5046         }
5047
5048         if (rdev->flags & RADEON_IS_IGP)
5049                 kv_save_regs_for_reset(rdev, &kv_save);
5050
5051         /* disable BM */
5052         pci_clear_master(rdev->pdev);
5053         /* reset */
5054         radeon_pci_config_reset(rdev);
5055
5056         udelay(100);
5057
5058         /* wait for asic to come out of reset */
5059         for (i = 0; i < rdev->usec_timeout; i++) {
5060                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5061                         break;
5062                 udelay(1);
5063         }
5064
5065         /* does asic init need to be run first??? */
5066         if (rdev->flags & RADEON_IS_IGP)
5067                 kv_restore_regs_for_reset(rdev, &kv_save);
5068 }
5069
5070 /**
5071  * cik_asic_reset - soft reset GPU
5072  *
5073  * @rdev: radeon_device pointer
5074  *
5075  * Look up which blocks are hung and attempt
5076  * to reset them.
5077  * Returns 0 for success.
5078  */
5079 int cik_asic_reset(struct radeon_device *rdev)
5080 {
5081         u32 reset_mask;
5082
5083         reset_mask = cik_gpu_check_soft_reset(rdev);
5084
5085         if (reset_mask)
5086                 r600_set_bios_scratch_engine_hung(rdev, true);
5087
5088         /* try soft reset */
5089         cik_gpu_soft_reset(rdev, reset_mask);
5090
5091         reset_mask = cik_gpu_check_soft_reset(rdev);
5092
5093         /* try pci config reset */
5094         if (reset_mask && radeon_hard_reset)
5095                 cik_gpu_pci_config_reset(rdev);
5096
5097         reset_mask = cik_gpu_check_soft_reset(rdev);
5098
5099         if (!reset_mask)
5100                 r600_set_bios_scratch_engine_hung(rdev, false);
5101
5102         return 0;
5103 }
5104
5105 /**
5106  * cik_gfx_is_lockup - check if the 3D engine is locked up
5107  *
5108  * @rdev: radeon_device pointer
5109  * @ring: radeon_ring structure holding ring information
5110  *
5111  * Check if the 3D engine is locked up (CIK).
5112  * Returns true if the engine is locked, false if not.
5113  */
5114 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5115 {
5116         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5117
5118         if (!(reset_mask & (RADEON_RESET_GFX |
5119                             RADEON_RESET_COMPUTE |
5120                             RADEON_RESET_CP))) {
5121                 radeon_ring_lockup_update(ring);
5122                 return false;
5123         }
5124         /* force CP activities */
5125         radeon_ring_force_activity(rdev, ring);
5126         return radeon_ring_test_lockup(rdev, ring);
5127 }
5128
5129 /* MC */
5130 /**
5131  * cik_mc_program - program the GPU memory controller
5132  *
5133  * @rdev: radeon_device pointer
5134  *
5135  * Set the location of vram, gart, and AGP in the GPU's
5136  * physical address space (CIK).
5137  */
5138 static void cik_mc_program(struct radeon_device *rdev)
5139 {
5140         struct evergreen_mc_save save;
5141         u32 tmp;
5142         int i, j;
5143
5144         /* Initialize HDP */
5145         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5146                 WREG32((0x2c14 + j), 0x00000000);
5147                 WREG32((0x2c18 + j), 0x00000000);
5148                 WREG32((0x2c1c + j), 0x00000000);
5149                 WREG32((0x2c20 + j), 0x00000000);
5150                 WREG32((0x2c24 + j), 0x00000000);
5151         }
5152         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5153
5154         evergreen_mc_stop(rdev, &save);
5155         if (radeon_mc_wait_for_idle(rdev)) {
5156                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5157         }
5158         /* Lockout access through VGA aperture*/
5159         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5160         /* Update configuration */
5161         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5162                rdev->mc.vram_start >> 12);
5163         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5164                rdev->mc.vram_end >> 12);
5165         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5166                rdev->vram_scratch.gpu_addr >> 12);
5167         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5168         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5169         WREG32(MC_VM_FB_LOCATION, tmp);
5170         /* XXX double check these! */
5171         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5172         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5173         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5174         WREG32(MC_VM_AGP_BASE, 0);
5175         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5176         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5177         if (radeon_mc_wait_for_idle(rdev)) {
5178                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5179         }
5180         evergreen_mc_resume(rdev, &save);
5181         /* we need to own VRAM, so turn off the VGA renderer here
5182          * to stop it overwriting our objects */
5183         rv515_vga_render_disable(rdev);
5184 }
5185
5186 /**
5187  * cik_mc_init - initialize the memory controller driver params
5188  *
5189  * @rdev: radeon_device pointer
5190  *
5191  * Look up the amount of vram, vram width, and decide how to place
5192  * vram and gart within the GPU's physical address space (CIK).
5193  * Returns 0 for success.
5194  */
5195 static int cik_mc_init(struct radeon_device *rdev)
5196 {
5197         u32 tmp;
5198         int chansize, numchan;
5199
5200         /* Get VRAM informations */
5201         rdev->mc.vram_is_ddr = true;
5202         tmp = RREG32(MC_ARB_RAMCFG);
5203         if (tmp & CHANSIZE_MASK) {
5204                 chansize = 64;
5205         } else {
5206                 chansize = 32;
5207         }
5208         tmp = RREG32(MC_SHARED_CHMAP);
5209         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5210         case 0:
5211         default:
5212                 numchan = 1;
5213                 break;
5214         case 1:
5215                 numchan = 2;
5216                 break;
5217         case 2:
5218                 numchan = 4;
5219                 break;
5220         case 3:
5221                 numchan = 8;
5222                 break;
5223         case 4:
5224                 numchan = 3;
5225                 break;
5226         case 5:
5227                 numchan = 6;
5228                 break;
5229         case 6:
5230                 numchan = 10;
5231                 break;
5232         case 7:
5233                 numchan = 12;
5234                 break;
5235         case 8:
5236                 numchan = 16;
5237                 break;
5238         }
5239         rdev->mc.vram_width = numchan * chansize;
5240         /* Could aper size report 0 ? */
5241         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5242         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5243         /* size in MB on si */
5244         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5245         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5246         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5247         si_vram_gtt_location(rdev, &rdev->mc);
5248         radeon_update_bandwidth_info(rdev);
5249
5250         return 0;
5251 }
5252
5253 /*
5254  * GART
5255  * VMID 0 is the physical GPU addresses as used by the kernel.
5256  * VMIDs 1-15 are used for userspace clients and are handled
5257  * by the radeon vm/hsa code.
5258  */
5259 /**
5260  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5261  *
5262  * @rdev: radeon_device pointer
5263  *
5264  * Flush the TLB for the VMID 0 page table (CIK).
5265  */
5266 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5267 {
5268         /* flush hdp cache */
5269         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5270
5271         /* bits 0-15 are the VM contexts0-15 */
5272         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5273 }
5274
5275 /**
5276  * cik_pcie_gart_enable - gart enable
5277  *
5278  * @rdev: radeon_device pointer
5279  *
5280  * This sets up the TLBs, programs the page tables for VMID0,
5281  * sets up the hw for VMIDs 1-15 which are allocated on
5282  * demand, and sets up the global locations for the LDS, GDS,
5283  * and GPUVM for FSA64 clients (CIK).
5284  * Returns 0 for success, errors for failure.
5285  */
5286 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5287 {
5288         int r, i;
5289
5290         if (rdev->gart.robj == NULL) {
5291                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5292                 return -EINVAL;
5293         }
5294         r = radeon_gart_table_vram_pin(rdev);
5295         if (r)
5296                 return r;
5297         radeon_gart_restore(rdev);
5298         /* Setup TLB control */
5299         WREG32(MC_VM_MX_L1_TLB_CNTL,
5300                (0xA << 7) |
5301                ENABLE_L1_TLB |
5302                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5303                ENABLE_ADVANCED_DRIVER_MODEL |
5304                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5305         /* Setup L2 cache */
5306         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5307                ENABLE_L2_FRAGMENT_PROCESSING |
5308                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5309                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5310                EFFECTIVE_L2_QUEUE_SIZE(7) |
5311                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5312         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5313         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5314                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5315         /* setup context0 */
5316         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5317         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5318         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5319         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5320                         (u32)(rdev->dummy_page.addr >> 12));
5321         WREG32(VM_CONTEXT0_CNTL2, 0);
5322         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5323                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5324
5325         WREG32(0x15D4, 0);
5326         WREG32(0x15D8, 0);
5327         WREG32(0x15DC, 0);
5328
5329         /* empty context1-15 */
5330         /* FIXME start with 4G, once using 2 level pt switch to full
5331          * vm size space
5332          */
5333         /* set vm size, must be a multiple of 4 */
5334         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5335         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5336         for (i = 1; i < 16; i++) {
5337                 if (i < 8)
5338                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5339                                rdev->gart.table_addr >> 12);
5340                 else
5341                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5342                                rdev->gart.table_addr >> 12);
5343         }
5344
5345         /* enable context1-15 */
5346         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5347                (u32)(rdev->dummy_page.addr >> 12));
5348         WREG32(VM_CONTEXT1_CNTL2, 4);
5349         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5350                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5351                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5352                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5353                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5354                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5355                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5356                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5357                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5358                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5359                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5360                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5361                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5362
5363         if (rdev->family == CHIP_KAVERI) {
5364                 u32 tmp = RREG32(CHUB_CONTROL);
5365                 tmp &= ~BYPASS_VM;
5366                 WREG32(CHUB_CONTROL, tmp);
5367         }
5368
5369         /* XXX SH_MEM regs */
5370         /* where to put LDS, scratch, GPUVM in FSA64 space */
5371         mutex_lock(&rdev->srbm_mutex);
5372         for (i = 0; i < 16; i++) {
5373                 cik_srbm_select(rdev, 0, 0, 0, i);
5374                 /* CP and shaders */
5375                 WREG32(SH_MEM_CONFIG, 0);
5376                 WREG32(SH_MEM_APE1_BASE, 1);
5377                 WREG32(SH_MEM_APE1_LIMIT, 0);
5378                 WREG32(SH_MEM_BASES, 0);
5379                 /* SDMA GFX */
5380                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5381                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5382                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5383                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5384                 /* XXX SDMA RLC - todo */
5385         }
5386         cik_srbm_select(rdev, 0, 0, 0, 0);
5387         mutex_unlock(&rdev->srbm_mutex);
5388
5389         cik_pcie_gart_tlb_flush(rdev);
5390         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5391                  (unsigned)(rdev->mc.gtt_size >> 20),
5392                  (unsigned long long)rdev->gart.table_addr);
5393         rdev->gart.ready = true;
5394         return 0;
5395 }
5396
5397 /**
5398  * cik_pcie_gart_disable - gart disable
5399  *
5400  * @rdev: radeon_device pointer
5401  *
5402  * This disables all VM page table (CIK).
5403  */
5404 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5405 {
5406         /* Disable all tables */
5407         WREG32(VM_CONTEXT0_CNTL, 0);
5408         WREG32(VM_CONTEXT1_CNTL, 0);
5409         /* Setup TLB control */
5410         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5412         /* Setup L2 cache */
5413         WREG32(VM_L2_CNTL,
5414                ENABLE_L2_FRAGMENT_PROCESSING |
5415                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5416                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5417                EFFECTIVE_L2_QUEUE_SIZE(7) |
5418                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5419         WREG32(VM_L2_CNTL2, 0);
5420         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5421                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5422         radeon_gart_table_vram_unpin(rdev);
5423 }
5424
5425 /**
5426  * cik_pcie_gart_fini - vm fini callback
5427  *
5428  * @rdev: radeon_device pointer
5429  *
5430  * Tears down the driver GART/VM setup (CIK).
5431  */
5432 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5433 {
5434         cik_pcie_gart_disable(rdev);
5435         radeon_gart_table_vram_free(rdev);
5436         radeon_gart_fini(rdev);
5437 }
5438
5439 /* vm parser */
5440 /**
5441  * cik_ib_parse - vm ib_parse callback
5442  *
5443  * @rdev: radeon_device pointer
5444  * @ib: indirect buffer pointer
5445  *
5446  * CIK uses hw IB checking so this is a nop (CIK).
5447  */
5448 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5449 {
5450         return 0;
5451 }
5452
5453 /*
5454  * vm
5455  * VMID 0 is the physical GPU addresses as used by the kernel.
5456  * VMIDs 1-15 are used for userspace clients and are handled
5457  * by the radeon vm/hsa code.
5458  */
5459 /**
5460  * cik_vm_init - cik vm init callback
5461  *
5462  * @rdev: radeon_device pointer
5463  *
5464  * Inits cik specific vm parameters (number of VMs, base of vram for
5465  * VMIDs 1-15) (CIK).
5466  * Returns 0 for success.
5467  */
5468 int cik_vm_init(struct radeon_device *rdev)
5469 {
5470         /* number of VMs */
5471         rdev->vm_manager.nvm = 16;
5472         /* base offset of vram pages */
5473         if (rdev->flags & RADEON_IS_IGP) {
5474                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5475                 tmp <<= 22;
5476                 rdev->vm_manager.vram_base_offset = tmp;
5477         } else
5478                 rdev->vm_manager.vram_base_offset = 0;
5479
5480         return 0;
5481 }
5482
5483 /**
5484  * cik_vm_fini - cik vm fini callback
5485  *
5486  * @rdev: radeon_device pointer
5487  *
5488  * Tear down any asic specific VM setup (CIK).
5489  */
5490 void cik_vm_fini(struct radeon_device *rdev)
5491 {
5492 }
5493
5494 /**
5495  * cik_vm_decode_fault - print human readable fault info
5496  *
5497  * @rdev: radeon_device pointer
5498  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5499  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5500  *
5501  * Print human readable fault information (CIK).
5502  */
5503 static void cik_vm_decode_fault(struct radeon_device *rdev,
5504                                 u32 status, u32 addr, u32 mc_client)
5505 {
5506         u32 mc_id;
5507         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5508         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5509         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5510                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5511
5512         if (rdev->family == CHIP_HAWAII)
5513                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5514         else
5515                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5516
5517         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5518                protections, vmid, addr,
5519                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5520                block, mc_client, mc_id);
5521 }
5522
5523 /**
5524  * cik_vm_flush - cik vm flush using the CP
5525  *
5526  * @rdev: radeon_device pointer
5527  *
5528  * Update the page table base and flush the VM TLB
5529  * using the CP (CIK).
5530  */
5531 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5532 {
5533         struct radeon_ring *ring = &rdev->ring[ridx];
5534
5535         if (vm == NULL)
5536                 return;
5537
5538         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5539         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5540                                  WRITE_DATA_DST_SEL(0)));
5541         if (vm->id < 8) {
5542                 radeon_ring_write(ring,
5543                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5544         } else {
5545                 radeon_ring_write(ring,
5546                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5547         }
5548         radeon_ring_write(ring, 0);
5549         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5550
5551         /* update SH_MEM_* regs */
5552         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5553         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5554                                  WRITE_DATA_DST_SEL(0)));
5555         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5556         radeon_ring_write(ring, 0);
5557         radeon_ring_write(ring, VMID(vm->id));
5558
5559         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5560         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5561                                  WRITE_DATA_DST_SEL(0)));
5562         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5563         radeon_ring_write(ring, 0);
5564
5565         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5566         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5567         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5568         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5569
5570         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5571         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5572                                  WRITE_DATA_DST_SEL(0)));
5573         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5574         radeon_ring_write(ring, 0);
5575         radeon_ring_write(ring, VMID(0));
5576
5577         /* HDP flush */
5578         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5579
5580         /* bits 0-15 are the VM contexts0-15 */
5581         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5582         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5583                                  WRITE_DATA_DST_SEL(0)));
5584         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5585         radeon_ring_write(ring, 0);
5586         radeon_ring_write(ring, 1 << vm->id);
5587
5588         /* compute doesn't have PFP */
5589         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5590                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5591                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5592                 radeon_ring_write(ring, 0x0);
5593         }
5594 }
5595
5596 /*
5597  * RLC
5598  * The RLC is a multi-purpose microengine that handles a
5599  * variety of functions, the most important of which is
5600  * the interrupt controller.
5601  */
5602 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5603                                           bool enable)
5604 {
5605         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5606
5607         if (enable)
5608                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5609         else
5610                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5611         WREG32(CP_INT_CNTL_RING0, tmp);
5612 }
5613
5614 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5615 {
5616         u32 tmp;
5617
5618         tmp = RREG32(RLC_LB_CNTL);
5619         if (enable)
5620                 tmp |= LOAD_BALANCE_ENABLE;
5621         else
5622                 tmp &= ~LOAD_BALANCE_ENABLE;
5623         WREG32(RLC_LB_CNTL, tmp);
5624 }
5625
5626 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5627 {
5628         u32 i, j, k;
5629         u32 mask;
5630
5631         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5632                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5633                         cik_select_se_sh(rdev, i, j);
5634                         for (k = 0; k < rdev->usec_timeout; k++) {
5635                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5636                                         break;
5637                                 udelay(1);
5638                         }
5639                 }
5640         }
5641         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5642
5643         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5644         for (k = 0; k < rdev->usec_timeout; k++) {
5645                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5646                         break;
5647                 udelay(1);
5648         }
5649 }
5650
5651 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5652 {
5653         u32 tmp;
5654
5655         tmp = RREG32(RLC_CNTL);
5656         if (tmp != rlc)
5657                 WREG32(RLC_CNTL, rlc);
5658 }
5659
5660 static u32 cik_halt_rlc(struct radeon_device *rdev)
5661 {
5662         u32 data, orig;
5663
5664         orig = data = RREG32(RLC_CNTL);
5665
5666         if (data & RLC_ENABLE) {
5667                 u32 i;
5668
5669                 data &= ~RLC_ENABLE;
5670                 WREG32(RLC_CNTL, data);
5671
5672                 for (i = 0; i < rdev->usec_timeout; i++) {
5673                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5674                                 break;
5675                         udelay(1);
5676                 }
5677
5678                 cik_wait_for_rlc_serdes(rdev);
5679         }
5680
5681         return orig;
5682 }
5683
5684 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5685 {
5686         u32 tmp, i, mask;
5687
5688         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5689         WREG32(RLC_GPR_REG2, tmp);
5690
5691         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5692         for (i = 0; i < rdev->usec_timeout; i++) {
5693                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5694                         break;
5695                 udelay(1);
5696         }
5697
5698         for (i = 0; i < rdev->usec_timeout; i++) {
5699                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5700                         break;
5701                 udelay(1);
5702         }
5703 }
5704
5705 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5706 {
5707         u32 tmp;
5708
5709         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5710         WREG32(RLC_GPR_REG2, tmp);
5711 }
5712
5713 /**
5714  * cik_rlc_stop - stop the RLC ME
5715  *
5716  * @rdev: radeon_device pointer
5717  *
5718  * Halt the RLC ME (MicroEngine) (CIK).
5719  */
5720 static void cik_rlc_stop(struct radeon_device *rdev)
5721 {
5722         WREG32(RLC_CNTL, 0);
5723
5724         cik_enable_gui_idle_interrupt(rdev, false);
5725
5726         cik_wait_for_rlc_serdes(rdev);
5727 }
5728
5729 /**
5730  * cik_rlc_start - start the RLC ME
5731  *
5732  * @rdev: radeon_device pointer
5733  *
5734  * Unhalt the RLC ME (MicroEngine) (CIK).
5735  */
5736 static void cik_rlc_start(struct radeon_device *rdev)
5737 {
5738         WREG32(RLC_CNTL, RLC_ENABLE);
5739
5740         cik_enable_gui_idle_interrupt(rdev, true);
5741
5742         udelay(50);
5743 }
5744
5745 /**
5746  * cik_rlc_resume - setup the RLC hw
5747  *
5748  * @rdev: radeon_device pointer
5749  *
5750  * Initialize the RLC registers, load the ucode,
5751  * and start the RLC (CIK).
5752  * Returns 0 for success, -EINVAL if the ucode is not available.
5753  */
5754 static int cik_rlc_resume(struct radeon_device *rdev)
5755 {
5756         u32 i, size, tmp;
5757         const __be32 *fw_data;
5758
5759         if (!rdev->rlc_fw)
5760                 return -EINVAL;
5761
5762         switch (rdev->family) {
5763         case CHIP_BONAIRE:
5764         case CHIP_HAWAII:
5765         default:
5766                 size = BONAIRE_RLC_UCODE_SIZE;
5767                 break;
5768         case CHIP_KAVERI:
5769                 size = KV_RLC_UCODE_SIZE;
5770                 break;
5771         case CHIP_KABINI:
5772                 size = KB_RLC_UCODE_SIZE;
5773                 break;
5774         }
5775
5776         cik_rlc_stop(rdev);
5777
5778         /* disable CG */
5779         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5780         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5781
5782         si_rlc_reset(rdev);
5783
5784         cik_init_pg(rdev);
5785
5786         cik_init_cg(rdev);
5787
5788         WREG32(RLC_LB_CNTR_INIT, 0);
5789         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5790
5791         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5792         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5793         WREG32(RLC_LB_PARAMS, 0x00600408);
5794         WREG32(RLC_LB_CNTL, 0x80000004);
5795
5796         WREG32(RLC_MC_CNTL, 0);
5797         WREG32(RLC_UCODE_CNTL, 0);
5798
5799         fw_data = (const __be32 *)rdev->rlc_fw->data;
5800                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5801         for (i = 0; i < size; i++)
5802                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5803         WREG32(RLC_GPM_UCODE_ADDR, 0);
5804
5805         /* XXX - find out what chips support lbpw */
5806         cik_enable_lbpw(rdev, false);
5807
5808         if (rdev->family == CHIP_BONAIRE)
5809                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5810
5811         cik_rlc_start(rdev);
5812
5813         return 0;
5814 }
5815
5816 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5817 {
5818         u32 data, orig, tmp, tmp2;
5819
5820         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5821
5822         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5823                 cik_enable_gui_idle_interrupt(rdev, true);
5824
5825                 tmp = cik_halt_rlc(rdev);
5826
5827                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5828                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5829                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5830                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5831                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5832
5833                 cik_update_rlc(rdev, tmp);
5834
5835                 data |= CGCG_EN | CGLS_EN;
5836         } else {
5837                 cik_enable_gui_idle_interrupt(rdev, false);
5838
5839                 RREG32(CB_CGTT_SCLK_CTRL);
5840                 RREG32(CB_CGTT_SCLK_CTRL);
5841                 RREG32(CB_CGTT_SCLK_CTRL);
5842                 RREG32(CB_CGTT_SCLK_CTRL);
5843
5844                 data &= ~(CGCG_EN | CGLS_EN);
5845         }
5846
5847         if (orig != data)
5848                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5849
5850 }
5851
5852 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5853 {
5854         u32 data, orig, tmp = 0;
5855
5856         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5857                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5858                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5859                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5860                                 data |= CP_MEM_LS_EN;
5861                                 if (orig != data)
5862                                         WREG32(CP_MEM_SLP_CNTL, data);
5863                         }
5864                 }
5865
5866                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5867                 data &= 0xfffffffd;
5868                 if (orig != data)
5869                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5870
5871                 tmp = cik_halt_rlc(rdev);
5872
5873                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5874                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5875                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5876                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5877                 WREG32(RLC_SERDES_WR_CTRL, data);
5878
5879                 cik_update_rlc(rdev, tmp);
5880
5881                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5882                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5883                         data &= ~SM_MODE_MASK;
5884                         data |= SM_MODE(0x2);
5885                         data |= SM_MODE_ENABLE;
5886                         data &= ~CGTS_OVERRIDE;
5887                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5888                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5889                                 data &= ~CGTS_LS_OVERRIDE;
5890                         data &= ~ON_MONITOR_ADD_MASK;
5891                         data |= ON_MONITOR_ADD_EN;
5892                         data |= ON_MONITOR_ADD(0x96);
5893                         if (orig != data)
5894                                 WREG32(CGTS_SM_CTRL_REG, data);
5895                 }
5896         } else {
5897                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5898                 data |= 0x00000002;
5899                 if (orig != data)
5900                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5901
5902                 data = RREG32(RLC_MEM_SLP_CNTL);
5903                 if (data & RLC_MEM_LS_EN) {
5904                         data &= ~RLC_MEM_LS_EN;
5905                         WREG32(RLC_MEM_SLP_CNTL, data);
5906                 }
5907
5908                 data = RREG32(CP_MEM_SLP_CNTL);
5909                 if (data & CP_MEM_LS_EN) {
5910                         data &= ~CP_MEM_LS_EN;
5911                         WREG32(CP_MEM_SLP_CNTL, data);
5912                 }
5913
5914                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5915                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5916                 if (orig != data)
5917                         WREG32(CGTS_SM_CTRL_REG, data);
5918
5919                 tmp = cik_halt_rlc(rdev);
5920
5921                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5922                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5923                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5924                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5925                 WREG32(RLC_SERDES_WR_CTRL, data);
5926
5927                 cik_update_rlc(rdev, tmp);
5928         }
5929 }
5930
5931 static const u32 mc_cg_registers[] =
5932 {
5933         MC_HUB_MISC_HUB_CG,
5934         MC_HUB_MISC_SIP_CG,
5935         MC_HUB_MISC_VM_CG,
5936         MC_XPB_CLK_GAT,
5937         ATC_MISC_CG,
5938         MC_CITF_MISC_WR_CG,
5939         MC_CITF_MISC_RD_CG,
5940         MC_CITF_MISC_VM_CG,
5941         VM_L2_CG,
5942 };
5943
5944 static void cik_enable_mc_ls(struct radeon_device *rdev,
5945                              bool enable)
5946 {
5947         int i;
5948         u32 orig, data;
5949
5950         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5951                 orig = data = RREG32(mc_cg_registers[i]);
5952                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5953                         data |= MC_LS_ENABLE;
5954                 else
5955                         data &= ~MC_LS_ENABLE;
5956                 if (data != orig)
5957                         WREG32(mc_cg_registers[i], data);
5958         }
5959 }
5960
5961 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5962                                bool enable)
5963 {
5964         int i;
5965         u32 orig, data;
5966
5967         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5968                 orig = data = RREG32(mc_cg_registers[i]);
5969                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5970                         data |= MC_CG_ENABLE;
5971                 else
5972                         data &= ~MC_CG_ENABLE;
5973                 if (data != orig)
5974                         WREG32(mc_cg_registers[i], data);
5975         }
5976 }
5977
5978 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5979                                  bool enable)
5980 {
5981         u32 orig, data;
5982
5983         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5984                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5985                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5986         } else {
5987                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5988                 data |= 0xff000000;
5989                 if (data != orig)
5990                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5991
5992                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5993                 data |= 0xff000000;
5994                 if (data != orig)
5995                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5996         }
5997 }
5998
5999 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6000                                  bool enable)
6001 {
6002         u32 orig, data;
6003
6004         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6005                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6006                 data |= 0x100;
6007                 if (orig != data)
6008                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6009
6010                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6011                 data |= 0x100;
6012                 if (orig != data)
6013                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6014         } else {
6015                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6016                 data &= ~0x100;
6017                 if (orig != data)
6018                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6019
6020                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6021                 data &= ~0x100;
6022                 if (orig != data)
6023                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6024         }
6025 }
6026
6027 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6028                                 bool enable)
6029 {
6030         u32 orig, data;
6031
6032         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6033                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6034                 data = 0xfff;
6035                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6036
6037                 orig = data = RREG32(UVD_CGC_CTRL);
6038                 data |= DCM;
6039                 if (orig != data)
6040                         WREG32(UVD_CGC_CTRL, data);
6041         } else {
6042                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6043                 data &= ~0xfff;
6044                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6045
6046                 orig = data = RREG32(UVD_CGC_CTRL);
6047                 data &= ~DCM;
6048                 if (orig != data)
6049                         WREG32(UVD_CGC_CTRL, data);
6050         }
6051 }
6052
6053 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6054                                bool enable)
6055 {
6056         u32 orig, data;
6057
6058         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6059
6060         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6061                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6062                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6063         else
6064                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6065                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6066
6067         if (orig != data)
6068                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6069 }
6070
6071 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6072                                 bool enable)
6073 {
6074         u32 orig, data;
6075
6076         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6077
6078         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6079                 data &= ~CLOCK_GATING_DIS;
6080         else
6081                 data |= CLOCK_GATING_DIS;
6082
6083         if (orig != data)
6084                 WREG32(HDP_HOST_PATH_CNTL, data);
6085 }
6086
6087 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6088                               bool enable)
6089 {
6090         u32 orig, data;
6091
6092         orig = data = RREG32(HDP_MEM_POWER_LS);
6093
6094         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6095                 data |= HDP_LS_ENABLE;
6096         else
6097                 data &= ~HDP_LS_ENABLE;
6098
6099         if (orig != data)
6100                 WREG32(HDP_MEM_POWER_LS, data);
6101 }
6102
6103 void cik_update_cg(struct radeon_device *rdev,
6104                    u32 block, bool enable)
6105 {
6106
6107         if (block & RADEON_CG_BLOCK_GFX) {
6108                 cik_enable_gui_idle_interrupt(rdev, false);
6109                 /* order matters! */
6110                 if (enable) {
6111                         cik_enable_mgcg(rdev, true);
6112                         cik_enable_cgcg(rdev, true);
6113                 } else {
6114                         cik_enable_cgcg(rdev, false);
6115                         cik_enable_mgcg(rdev, false);
6116                 }
6117                 cik_enable_gui_idle_interrupt(rdev, true);
6118         }
6119
6120         if (block & RADEON_CG_BLOCK_MC) {
6121                 if (!(rdev->flags & RADEON_IS_IGP)) {
6122                         cik_enable_mc_mgcg(rdev, enable);
6123                         cik_enable_mc_ls(rdev, enable);
6124                 }
6125         }
6126
6127         if (block & RADEON_CG_BLOCK_SDMA) {
6128                 cik_enable_sdma_mgcg(rdev, enable);
6129                 cik_enable_sdma_mgls(rdev, enable);
6130         }
6131
6132         if (block & RADEON_CG_BLOCK_BIF) {
6133                 cik_enable_bif_mgls(rdev, enable);
6134         }
6135
6136         if (block & RADEON_CG_BLOCK_UVD) {
6137                 if (rdev->has_uvd)
6138                         cik_enable_uvd_mgcg(rdev, enable);
6139         }
6140
6141         if (block & RADEON_CG_BLOCK_HDP) {
6142                 cik_enable_hdp_mgcg(rdev, enable);
6143                 cik_enable_hdp_ls(rdev, enable);
6144         }
6145
6146         if (block & RADEON_CG_BLOCK_VCE) {
6147                 vce_v2_0_enable_mgcg(rdev, enable);
6148         }
6149 }
6150
6151 static void cik_init_cg(struct radeon_device *rdev)
6152 {
6153
6154         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6155
6156         if (rdev->has_uvd)
6157                 si_init_uvd_internal_cg(rdev);
6158
6159         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6160                              RADEON_CG_BLOCK_SDMA |
6161                              RADEON_CG_BLOCK_BIF |
6162                              RADEON_CG_BLOCK_UVD |
6163                              RADEON_CG_BLOCK_HDP), true);
6164 }
6165
6166 static void cik_fini_cg(struct radeon_device *rdev)
6167 {
6168         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6169                              RADEON_CG_BLOCK_SDMA |
6170                              RADEON_CG_BLOCK_BIF |
6171                              RADEON_CG_BLOCK_UVD |
6172                              RADEON_CG_BLOCK_HDP), false);
6173
6174         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6175 }
6176
6177 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6178                                           bool enable)
6179 {
6180         u32 data, orig;
6181
6182         orig = data = RREG32(RLC_PG_CNTL);
6183         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6184                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6185         else
6186                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6187         if (orig != data)
6188                 WREG32(RLC_PG_CNTL, data);
6189 }
6190
6191 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6192                                           bool enable)
6193 {
6194         u32 data, orig;
6195
6196         orig = data = RREG32(RLC_PG_CNTL);
6197         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6198                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6199         else
6200                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6201         if (orig != data)
6202                 WREG32(RLC_PG_CNTL, data);
6203 }
6204
6205 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6206 {
6207         u32 data, orig;
6208
6209         orig = data = RREG32(RLC_PG_CNTL);
6210         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6211                 data &= ~DISABLE_CP_PG;
6212         else
6213                 data |= DISABLE_CP_PG;
6214         if (orig != data)
6215                 WREG32(RLC_PG_CNTL, data);
6216 }
6217
6218 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6219 {
6220         u32 data, orig;
6221
6222         orig = data = RREG32(RLC_PG_CNTL);
6223         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6224                 data &= ~DISABLE_GDS_PG;
6225         else
6226                 data |= DISABLE_GDS_PG;
6227         if (orig != data)
6228                 WREG32(RLC_PG_CNTL, data);
6229 }
6230
6231 #define CP_ME_TABLE_SIZE    96
6232 #define CP_ME_TABLE_OFFSET  2048
6233 #define CP_MEC_TABLE_OFFSET 4096
6234
6235 void cik_init_cp_pg_table(struct radeon_device *rdev)
6236 {
6237         const __be32 *fw_data;
6238         volatile u32 *dst_ptr;
6239         int me, i, max_me = 4;
6240         u32 bo_offset = 0;
6241         u32 table_offset;
6242
6243         if (rdev->family == CHIP_KAVERI)
6244                 max_me = 5;
6245
6246         if (rdev->rlc.cp_table_ptr == NULL)
6247                 return;
6248
6249         /* write the cp table buffer */
6250         dst_ptr = rdev->rlc.cp_table_ptr;
6251         for (me = 0; me < max_me; me++) {
6252                 if (me == 0) {
6253                         fw_data = (const __be32 *)rdev->ce_fw->data;
6254                         table_offset = CP_ME_TABLE_OFFSET;
6255                 } else if (me == 1) {
6256                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6257                         table_offset = CP_ME_TABLE_OFFSET;
6258                 } else if (me == 2) {
6259                         fw_data = (const __be32 *)rdev->me_fw->data;
6260                         table_offset = CP_ME_TABLE_OFFSET;
6261                 } else {
6262                         fw_data = (const __be32 *)rdev->mec_fw->data;
6263                         table_offset = CP_MEC_TABLE_OFFSET;
6264                 }
6265
6266                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6267                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6268                 }
6269                 bo_offset += CP_ME_TABLE_SIZE;
6270         }
6271 }
6272
6273 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6274                                 bool enable)
6275 {
6276         u32 data, orig;
6277
6278         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6279                 orig = data = RREG32(RLC_PG_CNTL);
6280                 data |= GFX_PG_ENABLE;
6281                 if (orig != data)
6282                         WREG32(RLC_PG_CNTL, data);
6283
6284                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6285                 data |= AUTO_PG_EN;
6286                 if (orig != data)
6287                         WREG32(RLC_AUTO_PG_CTRL, data);
6288         } else {
6289                 orig = data = RREG32(RLC_PG_CNTL);
6290                 data &= ~GFX_PG_ENABLE;
6291                 if (orig != data)
6292                         WREG32(RLC_PG_CNTL, data);
6293
6294                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6295                 data &= ~AUTO_PG_EN;
6296                 if (orig != data)
6297                         WREG32(RLC_AUTO_PG_CTRL, data);
6298
6299                 data = RREG32(DB_RENDER_CONTROL);
6300         }
6301 }
6302
6303 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6304 {
6305         u32 mask = 0, tmp, tmp1;
6306         int i;
6307
6308         cik_select_se_sh(rdev, se, sh);
6309         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6310         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6311         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6312
6313         tmp &= 0xffff0000;
6314
6315         tmp |= tmp1;
6316         tmp >>= 16;
6317
6318         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6319                 mask <<= 1;
6320                 mask |= 1;
6321         }
6322
6323         return (~tmp) & mask;
6324 }
6325
6326 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6327 {
6328         u32 i, j, k, active_cu_number = 0;
6329         u32 mask, counter, cu_bitmap;
6330         u32 tmp = 0;
6331
6332         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6333                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6334                         mask = 1;
6335                         cu_bitmap = 0;
6336                         counter = 0;
6337                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6338                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6339                                         if (counter < 2)
6340                                                 cu_bitmap |= mask;
6341                                         counter ++;
6342                                 }
6343                                 mask <<= 1;
6344                         }
6345
6346                         active_cu_number += counter;
6347                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6348                 }
6349         }
6350
6351         WREG32(RLC_PG_AO_CU_MASK, tmp);
6352
6353         tmp = RREG32(RLC_MAX_PG_CU);
6354         tmp &= ~MAX_PU_CU_MASK;
6355         tmp |= MAX_PU_CU(active_cu_number);
6356         WREG32(RLC_MAX_PG_CU, tmp);
6357 }
6358
6359 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6360                                        bool enable)
6361 {
6362         u32 data, orig;
6363
6364         orig = data = RREG32(RLC_PG_CNTL);
6365         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6366                 data |= STATIC_PER_CU_PG_ENABLE;
6367         else
6368                 data &= ~STATIC_PER_CU_PG_ENABLE;
6369         if (orig != data)
6370                 WREG32(RLC_PG_CNTL, data);
6371 }
6372
6373 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6374                                         bool enable)
6375 {
6376         u32 data, orig;
6377
6378         orig = data = RREG32(RLC_PG_CNTL);
6379         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6380                 data |= DYN_PER_CU_PG_ENABLE;
6381         else
6382                 data &= ~DYN_PER_CU_PG_ENABLE;
6383         if (orig != data)
6384                 WREG32(RLC_PG_CNTL, data);
6385 }
6386
6387 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6388 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6389
6390 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6391 {
6392         u32 data, orig;
6393         u32 i;
6394
6395         if (rdev->rlc.cs_data) {
6396                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6397                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6398                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6399                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6400         } else {
6401                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6402                 for (i = 0; i < 3; i++)
6403                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6404         }
6405         if (rdev->rlc.reg_list) {
6406                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6407                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6408                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6409         }
6410
6411         orig = data = RREG32(RLC_PG_CNTL);
6412         data |= GFX_PG_SRC;
6413         if (orig != data)
6414                 WREG32(RLC_PG_CNTL, data);
6415
6416         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6417         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6418
6419         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6420         data &= ~IDLE_POLL_COUNT_MASK;
6421         data |= IDLE_POLL_COUNT(0x60);
6422         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6423
6424         data = 0x10101010;
6425         WREG32(RLC_PG_DELAY, data);
6426
6427         data = RREG32(RLC_PG_DELAY_2);
6428         data &= ~0xff;
6429         data |= 0x3;
6430         WREG32(RLC_PG_DELAY_2, data);
6431
6432         data = RREG32(RLC_AUTO_PG_CTRL);
6433         data &= ~GRBM_REG_SGIT_MASK;
6434         data |= GRBM_REG_SGIT(0x700);
6435         WREG32(RLC_AUTO_PG_CTRL, data);
6436
6437 }
6438
6439 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6440 {
6441         cik_enable_gfx_cgpg(rdev, enable);
6442         cik_enable_gfx_static_mgpg(rdev, enable);
6443         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6444 }
6445
6446 u32 cik_get_csb_size(struct radeon_device *rdev)
6447 {
6448         u32 count = 0;
6449         const struct cs_section_def *sect = NULL;
6450         const struct cs_extent_def *ext = NULL;
6451
6452         if (rdev->rlc.cs_data == NULL)
6453                 return 0;
6454
6455         /* begin clear state */
6456         count += 2;
6457         /* context control state */
6458         count += 3;
6459
6460         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6461                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6462                         if (sect->id == SECT_CONTEXT)
6463                                 count += 2 + ext->reg_count;
6464                         else
6465                                 return 0;
6466                 }
6467         }
6468         /* pa_sc_raster_config/pa_sc_raster_config1 */
6469         count += 4;
6470         /* end clear state */
6471         count += 2;
6472         /* clear state */
6473         count += 2;
6474
6475         return count;
6476 }
6477
6478 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6479 {
6480         u32 count = 0, i;
6481         const struct cs_section_def *sect = NULL;
6482         const struct cs_extent_def *ext = NULL;
6483
6484         if (rdev->rlc.cs_data == NULL)
6485                 return;
6486         if (buffer == NULL)
6487                 return;
6488
6489         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6490         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6491
6492         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6493         buffer[count++] = cpu_to_le32(0x80000000);
6494         buffer[count++] = cpu_to_le32(0x80000000);
6495
6496         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6497                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6498                         if (sect->id == SECT_CONTEXT) {
6499                                 buffer[count++] =
6500                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6501                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6502                                 for (i = 0; i < ext->reg_count; i++)
6503                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6504                         } else {
6505                                 return;
6506                         }
6507                 }
6508         }
6509
6510         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6511         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6512         switch (rdev->family) {
6513         case CHIP_BONAIRE:
6514                 buffer[count++] = cpu_to_le32(0x16000012);
6515                 buffer[count++] = cpu_to_le32(0x00000000);
6516                 break;
6517         case CHIP_KAVERI:
6518                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6519                 buffer[count++] = cpu_to_le32(0x00000000);
6520                 break;
6521         case CHIP_KABINI:
6522                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6523                 buffer[count++] = cpu_to_le32(0x00000000);
6524                 break;
6525         case CHIP_HAWAII:
6526                 buffer[count++] = 0x3a00161a;
6527                 buffer[count++] = 0x0000002e;
6528                 break;
6529         default:
6530                 buffer[count++] = cpu_to_le32(0x00000000);
6531                 buffer[count++] = cpu_to_le32(0x00000000);
6532                 break;
6533         }
6534
6535         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6536         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6537
6538         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6539         buffer[count++] = cpu_to_le32(0);
6540 }
6541
6542 static void cik_init_pg(struct radeon_device *rdev)
6543 {
6544         if (rdev->pg_flags) {
6545                 cik_enable_sck_slowdown_on_pu(rdev, true);
6546                 cik_enable_sck_slowdown_on_pd(rdev, true);
6547                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6548                         cik_init_gfx_cgpg(rdev);
6549                         cik_enable_cp_pg(rdev, true);
6550                         cik_enable_gds_pg(rdev, true);
6551                 }
6552                 cik_init_ao_cu_mask(rdev);
6553                 cik_update_gfx_pg(rdev, true);
6554         }
6555 }
6556
6557 static void cik_fini_pg(struct radeon_device *rdev)
6558 {
6559         if (rdev->pg_flags) {
6560                 cik_update_gfx_pg(rdev, false);
6561                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6562                         cik_enable_cp_pg(rdev, false);
6563                         cik_enable_gds_pg(rdev, false);
6564                 }
6565         }
6566 }
6567
6568 /*
6569  * Interrupts
6570  * Starting with r6xx, interrupts are handled via a ring buffer.
6571  * Ring buffers are areas of GPU accessible memory that the GPU
6572  * writes interrupt vectors into and the host reads vectors out of.
6573  * There is a rptr (read pointer) that determines where the
6574  * host is currently reading, and a wptr (write pointer)
6575  * which determines where the GPU has written.  When the
6576  * pointers are equal, the ring is idle.  When the GPU
6577  * writes vectors to the ring buffer, it increments the
6578  * wptr.  When there is an interrupt, the host then starts
6579  * fetching commands and processing them until the pointers are
6580  * equal again at which point it updates the rptr.
6581  */
6582
6583 /**
6584  * cik_enable_interrupts - Enable the interrupt ring buffer
6585  *
6586  * @rdev: radeon_device pointer
6587  *
6588  * Enable the interrupt ring buffer (CIK).
6589  */
6590 static void cik_enable_interrupts(struct radeon_device *rdev)
6591 {
6592         u32 ih_cntl = RREG32(IH_CNTL);
6593         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6594
6595         ih_cntl |= ENABLE_INTR;
6596         ih_rb_cntl |= IH_RB_ENABLE;
6597         WREG32(IH_CNTL, ih_cntl);
6598         WREG32(IH_RB_CNTL, ih_rb_cntl);
6599         rdev->ih.enabled = true;
6600 }
6601
6602 /**
6603  * cik_disable_interrupts - Disable the interrupt ring buffer
6604  *
6605  * @rdev: radeon_device pointer
6606  *
6607  * Disable the interrupt ring buffer (CIK).
6608  */
6609 static void cik_disable_interrupts(struct radeon_device *rdev)
6610 {
6611         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6612         u32 ih_cntl = RREG32(IH_CNTL);
6613
6614         ih_rb_cntl &= ~IH_RB_ENABLE;
6615         ih_cntl &= ~ENABLE_INTR;
6616         WREG32(IH_RB_CNTL, ih_rb_cntl);
6617         WREG32(IH_CNTL, ih_cntl);
6618         /* set rptr, wptr to 0 */
6619         WREG32(IH_RB_RPTR, 0);
6620         WREG32(IH_RB_WPTR, 0);
6621         rdev->ih.enabled = false;
6622         rdev->ih.rptr = 0;
6623 }
6624
6625 /**
6626  * cik_disable_interrupt_state - Disable all interrupt sources
6627  *
6628  * @rdev: radeon_device pointer
6629  *
6630  * Clear all interrupt enable bits used by the driver (CIK).
6631  */
6632 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6633 {
6634         u32 tmp;
6635
6636         /* gfx ring */
6637         tmp = RREG32(CP_INT_CNTL_RING0) &
6638                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6639         WREG32(CP_INT_CNTL_RING0, tmp);
6640         /* sdma */
6641         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6642         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6643         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6644         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6645         /* compute queues */
6646         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6647         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6648         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6649         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6650         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6651         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6652         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6653         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6654         /* grbm */
6655         WREG32(GRBM_INT_CNTL, 0);
6656         /* vline/vblank, etc. */
6657         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6658         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6659         if (rdev->num_crtc >= 4) {
6660                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6661                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6662         }
6663         if (rdev->num_crtc >= 6) {
6664                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6665                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6666         }
6667
6668         /* dac hotplug */
6669         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6670
6671         /* digital hotplug */
6672         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6673         WREG32(DC_HPD1_INT_CONTROL, tmp);
6674         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6675         WREG32(DC_HPD2_INT_CONTROL, tmp);
6676         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6677         WREG32(DC_HPD3_INT_CONTROL, tmp);
6678         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6679         WREG32(DC_HPD4_INT_CONTROL, tmp);
6680         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6681         WREG32(DC_HPD5_INT_CONTROL, tmp);
6682         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6683         WREG32(DC_HPD6_INT_CONTROL, tmp);
6684
6685 }
6686
6687 /**
6688  * cik_irq_init - init and enable the interrupt ring
6689  *
6690  * @rdev: radeon_device pointer
6691  *
6692  * Allocate a ring buffer for the interrupt controller,
6693  * enable the RLC, disable interrupts, enable the IH
6694  * ring buffer and enable it (CIK).
6695  * Called at device load and reume.
6696  * Returns 0 for success, errors for failure.
6697  */
6698 static int cik_irq_init(struct radeon_device *rdev)
6699 {
6700         int ret = 0;
6701         int rb_bufsz;
6702         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6703
6704         /* allocate ring */
6705         ret = r600_ih_ring_alloc(rdev);
6706         if (ret)
6707                 return ret;
6708
6709         /* disable irqs */
6710         cik_disable_interrupts(rdev);
6711
6712         /* init rlc */
6713         ret = cik_rlc_resume(rdev);
6714         if (ret) {
6715                 r600_ih_ring_fini(rdev);
6716                 return ret;
6717         }
6718
6719         /* setup interrupt control */
6720         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6721         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6722         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6723         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6724          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6725          */
6726         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6727         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6728         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6729         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6730
6731         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6732         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6733
6734         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6735                       IH_WPTR_OVERFLOW_CLEAR |
6736                       (rb_bufsz << 1));
6737
6738         if (rdev->wb.enabled)
6739                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6740
6741         /* set the writeback address whether it's enabled or not */
6742         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6743         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6744
6745         WREG32(IH_RB_CNTL, ih_rb_cntl);
6746
6747         /* set rptr, wptr to 0 */
6748         WREG32(IH_RB_RPTR, 0);
6749         WREG32(IH_RB_WPTR, 0);
6750
6751         /* Default settings for IH_CNTL (disabled at first) */
6752         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6753         /* RPTR_REARM only works if msi's are enabled */
6754         if (rdev->msi_enabled)
6755                 ih_cntl |= RPTR_REARM;
6756         WREG32(IH_CNTL, ih_cntl);
6757
6758         /* force the active interrupt state to all disabled */
6759         cik_disable_interrupt_state(rdev);
6760
6761         pci_set_master(rdev->pdev);
6762
6763         /* enable irqs */
6764         cik_enable_interrupts(rdev);
6765
6766         return ret;
6767 }
6768
6769 /**
6770  * cik_irq_set - enable/disable interrupt sources
6771  *
6772  * @rdev: radeon_device pointer
6773  *
6774  * Enable interrupt sources on the GPU (vblanks, hpd,
6775  * etc.) (CIK).
6776  * Returns 0 for success, errors for failure.
6777  */
6778 int cik_irq_set(struct radeon_device *rdev)
6779 {
6780         u32 cp_int_cntl;
6781         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6782         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6783         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6784         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6785         u32 grbm_int_cntl = 0;
6786         u32 dma_cntl, dma_cntl1;
6787         u32 thermal_int;
6788
6789         if (!rdev->irq.installed) {
6790                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6791                 return -EINVAL;
6792         }
6793         /* don't enable anything if the ih is disabled */
6794         if (!rdev->ih.enabled) {
6795                 cik_disable_interrupts(rdev);
6796                 /* force the active interrupt state to all disabled */
6797                 cik_disable_interrupt_state(rdev);
6798                 return 0;
6799         }
6800
6801         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6802                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6803         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6804
6805         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6806         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6807         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6808         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6809         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6810         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6811
6812         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6813         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6814
6815         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6816         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6817         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6818         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6819         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6820         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6821         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6822         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6823
6824         if (rdev->flags & RADEON_IS_IGP)
6825                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6826                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6827         else
6828                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6829                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6830
6831         /* enable CP interrupts on all rings */
6832         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6833                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6834                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6835         }
6836         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6837                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6838                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6839                 if (ring->me == 1) {
6840                         switch (ring->pipe) {
6841                         case 0:
6842                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6843                                 break;
6844                         case 1:
6845                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6846                                 break;
6847                         case 2:
6848                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6849                                 break;
6850                         case 3:
6851                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6852                                 break;
6853                         default:
6854                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6855                                 break;
6856                         }
6857                 } else if (ring->me == 2) {
6858                         switch (ring->pipe) {
6859                         case 0:
6860                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6861                                 break;
6862                         case 1:
6863                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6864                                 break;
6865                         case 2:
6866                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6867                                 break;
6868                         case 3:
6869                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6870                                 break;
6871                         default:
6872                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6873                                 break;
6874                         }
6875                 } else {
6876                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6877                 }
6878         }
6879         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6880                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6881                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6882                 if (ring->me == 1) {
6883                         switch (ring->pipe) {
6884                         case 0:
6885                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6886                                 break;
6887                         case 1:
6888                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6889                                 break;
6890                         case 2:
6891                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6892                                 break;
6893                         case 3:
6894                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6895                                 break;
6896                         default:
6897                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6898                                 break;
6899                         }
6900                 } else if (ring->me == 2) {
6901                         switch (ring->pipe) {
6902                         case 0:
6903                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6904                                 break;
6905                         case 1:
6906                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6907                                 break;
6908                         case 2:
6909                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6910                                 break;
6911                         case 3:
6912                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6913                                 break;
6914                         default:
6915                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6916                                 break;
6917                         }
6918                 } else {
6919                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6920                 }
6921         }
6922
6923         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6924                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6925                 dma_cntl |= TRAP_ENABLE;
6926         }
6927
6928         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6929                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6930                 dma_cntl1 |= TRAP_ENABLE;
6931         }
6932
6933         if (rdev->irq.crtc_vblank_int[0] ||
6934             atomic_read(&rdev->irq.pflip[0])) {
6935                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6936                 crtc1 |= VBLANK_INTERRUPT_MASK;
6937         }
6938         if (rdev->irq.crtc_vblank_int[1] ||
6939             atomic_read(&rdev->irq.pflip[1])) {
6940                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6941                 crtc2 |= VBLANK_INTERRUPT_MASK;
6942         }
6943         if (rdev->irq.crtc_vblank_int[2] ||
6944             atomic_read(&rdev->irq.pflip[2])) {
6945                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6946                 crtc3 |= VBLANK_INTERRUPT_MASK;
6947         }
6948         if (rdev->irq.crtc_vblank_int[3] ||
6949             atomic_read(&rdev->irq.pflip[3])) {
6950                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6951                 crtc4 |= VBLANK_INTERRUPT_MASK;
6952         }
6953         if (rdev->irq.crtc_vblank_int[4] ||
6954             atomic_read(&rdev->irq.pflip[4])) {
6955                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6956                 crtc5 |= VBLANK_INTERRUPT_MASK;
6957         }
6958         if (rdev->irq.crtc_vblank_int[5] ||
6959             atomic_read(&rdev->irq.pflip[5])) {
6960                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6961                 crtc6 |= VBLANK_INTERRUPT_MASK;
6962         }
6963         if (rdev->irq.hpd[0]) {
6964                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6965                 hpd1 |= DC_HPDx_INT_EN;
6966         }
6967         if (rdev->irq.hpd[1]) {
6968                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6969                 hpd2 |= DC_HPDx_INT_EN;
6970         }
6971         if (rdev->irq.hpd[2]) {
6972                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6973                 hpd3 |= DC_HPDx_INT_EN;
6974         }
6975         if (rdev->irq.hpd[3]) {
6976                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6977                 hpd4 |= DC_HPDx_INT_EN;
6978         }
6979         if (rdev->irq.hpd[4]) {
6980                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6981                 hpd5 |= DC_HPDx_INT_EN;
6982         }
6983         if (rdev->irq.hpd[5]) {
6984                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6985                 hpd6 |= DC_HPDx_INT_EN;
6986         }
6987
6988         if (rdev->irq.dpm_thermal) {
6989                 DRM_DEBUG("dpm thermal\n");
6990                 if (rdev->flags & RADEON_IS_IGP)
6991                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6992                 else
6993                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6994         }
6995
6996         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6997
6998         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6999         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7000
7001         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7002         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7003         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7004         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7005         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7006         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7007         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7008         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7009
7010         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7011
7012         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7013         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7014         if (rdev->num_crtc >= 4) {
7015                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7016                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7017         }
7018         if (rdev->num_crtc >= 6) {
7019                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7020                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7021         }
7022
7023         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7024         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7025         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7026         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7027         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7028         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7029
7030         if (rdev->flags & RADEON_IS_IGP)
7031                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7032         else
7033                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7034
7035         return 0;
7036 }
7037
7038 /**
7039  * cik_irq_ack - ack interrupt sources
7040  *
7041  * @rdev: radeon_device pointer
7042  *
7043  * Ack interrupt sources on the GPU (vblanks, hpd,
7044  * etc.) (CIK).  Certain interrupts sources are sw
7045  * generated and do not require an explicit ack.
7046  */
7047 static inline void cik_irq_ack(struct radeon_device *rdev)
7048 {
7049         u32 tmp;
7050
7051         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7052         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7053         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7054         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7055         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7056         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7057         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7058
7059         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7060                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7061         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7062                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7063         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7064                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7065         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7066                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7067
7068         if (rdev->num_crtc >= 4) {
7069                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7070                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7071                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7072                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7073                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7074                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7075                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7076                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7077         }
7078
7079         if (rdev->num_crtc >= 6) {
7080                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7081                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7082                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7083                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7084                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7085                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7086                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7087                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7088         }
7089
7090         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7091                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7092                 tmp |= DC_HPDx_INT_ACK;
7093                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7094         }
7095         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7096                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7097                 tmp |= DC_HPDx_INT_ACK;
7098                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7099         }
7100         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7101                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7102                 tmp |= DC_HPDx_INT_ACK;
7103                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7104         }
7105         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7106                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7107                 tmp |= DC_HPDx_INT_ACK;
7108                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7109         }
7110         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7111                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7112                 tmp |= DC_HPDx_INT_ACK;
7113                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7114         }
7115         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7116                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7117                 tmp |= DC_HPDx_INT_ACK;
7118                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7119         }
7120 }
7121
7122 /**
7123  * cik_irq_disable - disable interrupts
7124  *
7125  * @rdev: radeon_device pointer
7126  *
7127  * Disable interrupts on the hw (CIK).
7128  */
7129 static void cik_irq_disable(struct radeon_device *rdev)
7130 {
7131         cik_disable_interrupts(rdev);
7132         /* Wait and acknowledge irq */
7133         mdelay(1);
7134         cik_irq_ack(rdev);
7135         cik_disable_interrupt_state(rdev);
7136 }
7137
7138 /**
7139  * cik_irq_disable - disable interrupts for suspend
7140  *
7141  * @rdev: radeon_device pointer
7142  *
7143  * Disable interrupts and stop the RLC (CIK).
7144  * Used for suspend.
7145  */
7146 static void cik_irq_suspend(struct radeon_device *rdev)
7147 {
7148         cik_irq_disable(rdev);
7149         cik_rlc_stop(rdev);
7150 }
7151
7152 /**
7153  * cik_irq_fini - tear down interrupt support
7154  *
7155  * @rdev: radeon_device pointer
7156  *
7157  * Disable interrupts on the hw and free the IH ring
7158  * buffer (CIK).
7159  * Used for driver unload.
7160  */
7161 static void cik_irq_fini(struct radeon_device *rdev)
7162 {
7163         cik_irq_suspend(rdev);
7164         r600_ih_ring_fini(rdev);
7165 }
7166
7167 /**
7168  * cik_get_ih_wptr - get the IH ring buffer wptr
7169  *
7170  * @rdev: radeon_device pointer
7171  *
7172  * Get the IH ring buffer wptr from either the register
7173  * or the writeback memory buffer (CIK).  Also check for
7174  * ring buffer overflow and deal with it.
7175  * Used by cik_irq_process().
7176  * Returns the value of the wptr.
7177  */
7178 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7179 {
7180         u32 wptr, tmp;
7181
7182         if (rdev->wb.enabled)
7183                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7184         else
7185                 wptr = RREG32(IH_RB_WPTR);
7186
7187         if (wptr & RB_OVERFLOW) {
7188                 /* When a ring buffer overflow happen start parsing interrupt
7189                  * from the last not overwritten vector (wptr + 16). Hopefully
7190                  * this should allow us to catchup.
7191                  */
7192                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7193                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7194                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7195                 tmp = RREG32(IH_RB_CNTL);
7196                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7197                 WREG32(IH_RB_CNTL, tmp);
7198         }
7199         return (wptr & rdev->ih.ptr_mask);
7200 }
7201
7202 /*        CIK IV Ring
7203  * Each IV ring entry is 128 bits:
7204  * [7:0]    - interrupt source id
7205  * [31:8]   - reserved
7206  * [59:32]  - interrupt source data
7207  * [63:60]  - reserved
7208  * [71:64]  - RINGID
7209  *            CP:
7210  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7211  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7212  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7213  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7214  *            PIPE_ID - ME0 0=3D
7215  *                    - ME1&2 compute dispatcher (4 pipes each)
7216  *            SDMA:
7217  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7218  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7219  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7220  * [79:72]  - VMID
7221  * [95:80]  - PASID
7222  * [127:96] - reserved
7223  */
7224 /**
7225  * cik_irq_process - interrupt handler
7226  *
7227  * @rdev: radeon_device pointer
7228  *
7229  * Interrupt hander (CIK).  Walk the IH ring,
7230  * ack interrupts and schedule work to handle
7231  * interrupt events.
7232  * Returns irq process return code.
7233  */
7234 int cik_irq_process(struct radeon_device *rdev)
7235 {
7236         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7237         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7238         u32 wptr;
7239         u32 rptr;
7240         u32 src_id, src_data, ring_id;
7241         u8 me_id, pipe_id, queue_id;
7242         u32 ring_index;
7243         bool queue_hotplug = false;
7244         bool queue_reset = false;
7245         u32 addr, status, mc_client;
7246         bool queue_thermal = false;
7247
7248         if (!rdev->ih.enabled || rdev->shutdown)
7249                 return IRQ_NONE;
7250
7251         wptr = cik_get_ih_wptr(rdev);
7252
7253 restart_ih:
7254         /* is somebody else already processing irqs? */
7255         if (atomic_xchg(&rdev->ih.lock, 1))
7256                 return IRQ_NONE;
7257
7258         rptr = rdev->ih.rptr;
7259         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7260
7261         /* Order reading of wptr vs. reading of IH ring data */
7262         rmb();
7263
7264         /* display interrupts */
7265         cik_irq_ack(rdev);
7266
7267         while (rptr != wptr) {
7268                 /* wptr/rptr are in bytes! */
7269                 ring_index = rptr / 4;
7270                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7271                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7272                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7273
7274                 switch (src_id) {
7275                 case 1: /* D1 vblank/vline */
7276                         switch (src_data) {
7277                         case 0: /* D1 vblank */
7278                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7279                                         if (rdev->irq.crtc_vblank_int[0]) {
7280                                                 drm_handle_vblank(rdev->ddev, 0);
7281                                                 rdev->pm.vblank_sync = true;
7282                                                 wake_up(&rdev->irq.vblank_queue);
7283                                         }
7284                                         if (atomic_read(&rdev->irq.pflip[0]))
7285                                                 radeon_crtc_handle_flip(rdev, 0);
7286                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7287                                         DRM_DEBUG("IH: D1 vblank\n");
7288                                 }
7289                                 break;
7290                         case 1: /* D1 vline */
7291                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7292                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7293                                         DRM_DEBUG("IH: D1 vline\n");
7294                                 }
7295                                 break;
7296                         default:
7297                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7298                                 break;
7299                         }
7300                         break;
7301                 case 2: /* D2 vblank/vline */
7302                         switch (src_data) {
7303                         case 0: /* D2 vblank */
7304                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7305                                         if (rdev->irq.crtc_vblank_int[1]) {
7306                                                 drm_handle_vblank(rdev->ddev, 1);
7307                                                 rdev->pm.vblank_sync = true;
7308                                                 wake_up(&rdev->irq.vblank_queue);
7309                                         }
7310                                         if (atomic_read(&rdev->irq.pflip[1]))
7311                                                 radeon_crtc_handle_flip(rdev, 1);
7312                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7313                                         DRM_DEBUG("IH: D2 vblank\n");
7314                                 }
7315                                 break;
7316                         case 1: /* D2 vline */
7317                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7318                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7319                                         DRM_DEBUG("IH: D2 vline\n");
7320                                 }
7321                                 break;
7322                         default:
7323                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7324                                 break;
7325                         }
7326                         break;
7327                 case 3: /* D3 vblank/vline */
7328                         switch (src_data) {
7329                         case 0: /* D3 vblank */
7330                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7331                                         if (rdev->irq.crtc_vblank_int[2]) {
7332                                                 drm_handle_vblank(rdev->ddev, 2);
7333                                                 rdev->pm.vblank_sync = true;
7334                                                 wake_up(&rdev->irq.vblank_queue);
7335                                         }
7336                                         if (atomic_read(&rdev->irq.pflip[2]))
7337                                                 radeon_crtc_handle_flip(rdev, 2);
7338                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7339                                         DRM_DEBUG("IH: D3 vblank\n");
7340                                 }
7341                                 break;
7342                         case 1: /* D3 vline */
7343                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7344                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7345                                         DRM_DEBUG("IH: D3 vline\n");
7346                                 }
7347                                 break;
7348                         default:
7349                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7350                                 break;
7351                         }
7352                         break;
7353                 case 4: /* D4 vblank/vline */
7354                         switch (src_data) {
7355                         case 0: /* D4 vblank */
7356                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7357                                         if (rdev->irq.crtc_vblank_int[3]) {
7358                                                 drm_handle_vblank(rdev->ddev, 3);
7359                                                 rdev->pm.vblank_sync = true;
7360                                                 wake_up(&rdev->irq.vblank_queue);
7361                                         }
7362                                         if (atomic_read(&rdev->irq.pflip[3]))
7363                                                 radeon_crtc_handle_flip(rdev, 3);
7364                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7365                                         DRM_DEBUG("IH: D4 vblank\n");
7366                                 }
7367                                 break;
7368                         case 1: /* D4 vline */
7369                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7370                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7371                                         DRM_DEBUG("IH: D4 vline\n");
7372                                 }
7373                                 break;
7374                         default:
7375                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7376                                 break;
7377                         }
7378                         break;
7379                 case 5: /* D5 vblank/vline */
7380                         switch (src_data) {
7381                         case 0: /* D5 vblank */
7382                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7383                                         if (rdev->irq.crtc_vblank_int[4]) {
7384                                                 drm_handle_vblank(rdev->ddev, 4);
7385                                                 rdev->pm.vblank_sync = true;
7386                                                 wake_up(&rdev->irq.vblank_queue);
7387                                         }
7388                                         if (atomic_read(&rdev->irq.pflip[4]))
7389                                                 radeon_crtc_handle_flip(rdev, 4);
7390                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7391                                         DRM_DEBUG("IH: D5 vblank\n");
7392                                 }
7393                                 break;
7394                         case 1: /* D5 vline */
7395                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7396                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7397                                         DRM_DEBUG("IH: D5 vline\n");
7398                                 }
7399                                 break;
7400                         default:
7401                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7402                                 break;
7403                         }
7404                         break;
7405                 case 6: /* D6 vblank/vline */
7406                         switch (src_data) {
7407                         case 0: /* D6 vblank */
7408                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7409                                         if (rdev->irq.crtc_vblank_int[5]) {
7410                                                 drm_handle_vblank(rdev->ddev, 5);
7411                                                 rdev->pm.vblank_sync = true;
7412                                                 wake_up(&rdev->irq.vblank_queue);
7413                                         }
7414                                         if (atomic_read(&rdev->irq.pflip[5]))
7415                                                 radeon_crtc_handle_flip(rdev, 5);
7416                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7417                                         DRM_DEBUG("IH: D6 vblank\n");
7418                                 }
7419                                 break;
7420                         case 1: /* D6 vline */
7421                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7422                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7423                                         DRM_DEBUG("IH: D6 vline\n");
7424                                 }
7425                                 break;
7426                         default:
7427                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7428                                 break;
7429                         }
7430                         break;
7431                 case 42: /* HPD hotplug */
7432                         switch (src_data) {
7433                         case 0:
7434                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7435                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7436                                         queue_hotplug = true;
7437                                         DRM_DEBUG("IH: HPD1\n");
7438                                 }
7439                                 break;
7440                         case 1:
7441                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7442                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7443                                         queue_hotplug = true;
7444                                         DRM_DEBUG("IH: HPD2\n");
7445                                 }
7446                                 break;
7447                         case 2:
7448                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7449                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7450                                         queue_hotplug = true;
7451                                         DRM_DEBUG("IH: HPD3\n");
7452                                 }
7453                                 break;
7454                         case 3:
7455                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7456                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7457                                         queue_hotplug = true;
7458                                         DRM_DEBUG("IH: HPD4\n");
7459                                 }
7460                                 break;
7461                         case 4:
7462                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7463                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7464                                         queue_hotplug = true;
7465                                         DRM_DEBUG("IH: HPD5\n");
7466                                 }
7467                                 break;
7468                         case 5:
7469                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7470                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7471                                         queue_hotplug = true;
7472                                         DRM_DEBUG("IH: HPD6\n");
7473                                 }
7474                                 break;
7475                         default:
7476                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7477                                 break;
7478                         }
7479                         break;
7480                 case 124: /* UVD */
7481                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7482                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7483                         break;
7484                 case 146:
7485                 case 147:
7486                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7487                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7488                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7489                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7490                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7491                                 addr);
7492                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7493                                 status);
7494                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7495                         /* reset addr and status */
7496                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7497                         break;
7498                 case 167: /* VCE */
7499                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7500                         switch (src_data) {
7501                         case 0:
7502                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7503                                 break;
7504                         case 1:
7505                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7506                                 break;
7507                         default:
7508                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7509                                 break;
7510                         }
7511                         break;
7512                 case 176: /* GFX RB CP_INT */
7513                 case 177: /* GFX IB CP_INT */
7514                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7515                         break;
7516                 case 181: /* CP EOP event */
7517                         DRM_DEBUG("IH: CP EOP\n");
7518                         /* XXX check the bitfield order! */
7519                         me_id = (ring_id & 0x60) >> 5;
7520                         pipe_id = (ring_id & 0x18) >> 3;
7521                         queue_id = (ring_id & 0x7) >> 0;
7522                         switch (me_id) {
7523                         case 0:
7524                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7525                                 break;
7526                         case 1:
7527                         case 2:
7528                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7529                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7530                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7531                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7532                                 break;
7533                         }
7534                         break;
7535                 case 184: /* CP Privileged reg access */
7536                         DRM_ERROR("Illegal register access in command stream\n");
7537                         /* XXX check the bitfield order! */
7538                         me_id = (ring_id & 0x60) >> 5;
7539                         pipe_id = (ring_id & 0x18) >> 3;
7540                         queue_id = (ring_id & 0x7) >> 0;
7541                         switch (me_id) {
7542                         case 0:
7543                                 /* This results in a full GPU reset, but all we need to do is soft
7544                                  * reset the CP for gfx
7545                                  */
7546                                 queue_reset = true;
7547                                 break;
7548                         case 1:
7549                                 /* XXX compute */
7550                                 queue_reset = true;
7551                                 break;
7552                         case 2:
7553                                 /* XXX compute */
7554                                 queue_reset = true;
7555                                 break;
7556                         }
7557                         break;
7558                 case 185: /* CP Privileged inst */
7559                         DRM_ERROR("Illegal instruction in command stream\n");
7560                         /* XXX check the bitfield order! */
7561                         me_id = (ring_id & 0x60) >> 5;
7562                         pipe_id = (ring_id & 0x18) >> 3;
7563                         queue_id = (ring_id & 0x7) >> 0;
7564                         switch (me_id) {
7565                         case 0:
7566                                 /* This results in a full GPU reset, but all we need to do is soft
7567                                  * reset the CP for gfx
7568                                  */
7569                                 queue_reset = true;
7570                                 break;
7571                         case 1:
7572                                 /* XXX compute */
7573                                 queue_reset = true;
7574                                 break;
7575                         case 2:
7576                                 /* XXX compute */
7577                                 queue_reset = true;
7578                                 break;
7579                         }
7580                         break;
7581                 case 224: /* SDMA trap event */
7582                         /* XXX check the bitfield order! */
7583                         me_id = (ring_id & 0x3) >> 0;
7584                         queue_id = (ring_id & 0xc) >> 2;
7585                         DRM_DEBUG("IH: SDMA trap\n");
7586                         switch (me_id) {
7587                         case 0:
7588                                 switch (queue_id) {
7589                                 case 0:
7590                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7591                                         break;
7592                                 case 1:
7593                                         /* XXX compute */
7594                                         break;
7595                                 case 2:
7596                                         /* XXX compute */
7597                                         break;
7598                                 }
7599                                 break;
7600                         case 1:
7601                                 switch (queue_id) {
7602                                 case 0:
7603                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7604                                         break;
7605                                 case 1:
7606                                         /* XXX compute */
7607                                         break;
7608                                 case 2:
7609                                         /* XXX compute */
7610                                         break;
7611                                 }
7612                                 break;
7613                         }
7614                         break;
7615                 case 230: /* thermal low to high */
7616                         DRM_DEBUG("IH: thermal low to high\n");
7617                         rdev->pm.dpm.thermal.high_to_low = false;
7618                         queue_thermal = true;
7619                         break;
7620                 case 231: /* thermal high to low */
7621                         DRM_DEBUG("IH: thermal high to low\n");
7622                         rdev->pm.dpm.thermal.high_to_low = true;
7623                         queue_thermal = true;
7624                         break;
7625                 case 233: /* GUI IDLE */
7626                         DRM_DEBUG("IH: GUI idle\n");
7627                         break;
7628                 case 241: /* SDMA Privileged inst */
7629                 case 247: /* SDMA Privileged inst */
7630                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7631                         /* XXX check the bitfield order! */
7632                         me_id = (ring_id & 0x3) >> 0;
7633                         queue_id = (ring_id & 0xc) >> 2;
7634                         switch (me_id) {
7635                         case 0:
7636                                 switch (queue_id) {
7637                                 case 0:
7638                                         queue_reset = true;
7639                                         break;
7640                                 case 1:
7641                                         /* XXX compute */
7642                                         queue_reset = true;
7643                                         break;
7644                                 case 2:
7645                                         /* XXX compute */
7646                                         queue_reset = true;
7647                                         break;
7648                                 }
7649                                 break;
7650                         case 1:
7651                                 switch (queue_id) {
7652                                 case 0:
7653                                         queue_reset = true;
7654                                         break;
7655                                 case 1:
7656                                         /* XXX compute */
7657                                         queue_reset = true;
7658                                         break;
7659                                 case 2:
7660                                         /* XXX compute */
7661                                         queue_reset = true;
7662                                         break;
7663                                 }
7664                                 break;
7665                         }
7666                         break;
7667                 default:
7668                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7669                         break;
7670                 }
7671
7672                 /* wptr/rptr are in bytes! */
7673                 rptr += 16;
7674                 rptr &= rdev->ih.ptr_mask;
7675         }
7676         if (queue_hotplug)
7677                 schedule_work(&rdev->hotplug_work);
7678         if (queue_reset)
7679                 schedule_work(&rdev->reset_work);
7680         if (queue_thermal)
7681                 schedule_work(&rdev->pm.dpm.thermal.work);
7682         rdev->ih.rptr = rptr;
7683         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7684         atomic_set(&rdev->ih.lock, 0);
7685
7686         /* make sure wptr hasn't changed while processing */
7687         wptr = cik_get_ih_wptr(rdev);
7688         if (wptr != rptr)
7689                 goto restart_ih;
7690
7691         return IRQ_HANDLED;
7692 }
7693
7694 /*
7695  * startup/shutdown callbacks
7696  */
7697 /**
7698  * cik_startup - program the asic to a functional state
7699  *
7700  * @rdev: radeon_device pointer
7701  *
7702  * Programs the asic to a functional state (CIK).
7703  * Called by cik_init() and cik_resume().
7704  * Returns 0 for success, error for failure.
7705  */
7706 static int cik_startup(struct radeon_device *rdev)
7707 {
7708         struct radeon_ring *ring;
7709         int r;
7710
7711         /* enable pcie gen2/3 link */
7712         cik_pcie_gen3_enable(rdev);
7713         /* enable aspm */
7714         cik_program_aspm(rdev);
7715
7716         /* scratch needs to be initialized before MC */
7717         r = r600_vram_scratch_init(rdev);
7718         if (r)
7719                 return r;
7720
7721         cik_mc_program(rdev);
7722
7723         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7724                 r = ci_mc_load_microcode(rdev);
7725                 if (r) {
7726                         DRM_ERROR("Failed to load MC firmware!\n");
7727                         return r;
7728                 }
7729         }
7730
7731         r = cik_pcie_gart_enable(rdev);
7732         if (r)
7733                 return r;
7734         cik_gpu_init(rdev);
7735
7736         /* allocate rlc buffers */
7737         if (rdev->flags & RADEON_IS_IGP) {
7738                 if (rdev->family == CHIP_KAVERI) {
7739                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7740                         rdev->rlc.reg_list_size =
7741                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7742                 } else {
7743                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7744                         rdev->rlc.reg_list_size =
7745                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7746                 }
7747         }
7748         rdev->rlc.cs_data = ci_cs_data;
7749         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7750         r = sumo_rlc_init(rdev);
7751         if (r) {
7752                 DRM_ERROR("Failed to init rlc BOs!\n");
7753                 return r;
7754         }
7755
7756         /* allocate wb buffer */
7757         r = radeon_wb_init(rdev);
7758         if (r)
7759                 return r;
7760
7761         /* allocate mec buffers */
7762         r = cik_mec_init(rdev);
7763         if (r) {
7764                 DRM_ERROR("Failed to init MEC BOs!\n");
7765                 return r;
7766         }
7767
7768         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7769         if (r) {
7770                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7771                 return r;
7772         }
7773
7774         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7775         if (r) {
7776                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7777                 return r;
7778         }
7779
7780         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7781         if (r) {
7782                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7783                 return r;
7784         }
7785
7786         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7787         if (r) {
7788                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7789                 return r;
7790         }
7791
7792         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7793         if (r) {
7794                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7795                 return r;
7796         }
7797
7798         r = radeon_uvd_resume(rdev);
7799         if (!r) {
7800                 r = uvd_v4_2_resume(rdev);
7801                 if (!r) {
7802                         r = radeon_fence_driver_start_ring(rdev,
7803                                                            R600_RING_TYPE_UVD_INDEX);
7804                         if (r)
7805                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7806                 }
7807         }
7808         if (r)
7809                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7810
7811         r = radeon_vce_resume(rdev);
7812         if (!r) {
7813                 r = vce_v2_0_resume(rdev);
7814                 if (!r)
7815                         r = radeon_fence_driver_start_ring(rdev,
7816                                                            TN_RING_TYPE_VCE1_INDEX);
7817                 if (!r)
7818                         r = radeon_fence_driver_start_ring(rdev,
7819                                                            TN_RING_TYPE_VCE2_INDEX);
7820         }
7821         if (r) {
7822                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
7823                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7824                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7825         }
7826
7827         /* Enable IRQ */
7828         if (!rdev->irq.installed) {
7829                 r = radeon_irq_kms_init(rdev);
7830                 if (r)
7831                         return r;
7832         }
7833
7834         r = cik_irq_init(rdev);
7835         if (r) {
7836                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7837                 radeon_irq_kms_fini(rdev);
7838                 return r;
7839         }
7840         cik_irq_set(rdev);
7841
7842         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7843         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7844                              PACKET3(PACKET3_NOP, 0x3FFF));
7845         if (r)
7846                 return r;
7847
7848         /* set up the compute queues */
7849         /* type-2 packets are deprecated on MEC, use type-3 instead */
7850         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7851         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7852                              PACKET3(PACKET3_NOP, 0x3FFF));
7853         if (r)
7854                 return r;
7855         ring->me = 1; /* first MEC */
7856         ring->pipe = 0; /* first pipe */
7857         ring->queue = 0; /* first queue */
7858         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7859
7860         /* type-2 packets are deprecated on MEC, use type-3 instead */
7861         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7862         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7863                              PACKET3(PACKET3_NOP, 0x3FFF));
7864         if (r)
7865                 return r;
7866         /* dGPU only have 1 MEC */
7867         ring->me = 1; /* first MEC */
7868         ring->pipe = 0; /* first pipe */
7869         ring->queue = 1; /* second queue */
7870         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7871
7872         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7873         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7874                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7875         if (r)
7876                 return r;
7877
7878         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7879         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7880                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7881         if (r)
7882                 return r;
7883
7884         r = cik_cp_resume(rdev);
7885         if (r)
7886                 return r;
7887
7888         r = cik_sdma_resume(rdev);
7889         if (r)
7890                 return r;
7891
7892         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7893         if (ring->ring_size) {
7894                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7895                                      RADEON_CP_PACKET2);
7896                 if (!r)
7897                         r = uvd_v1_0_init(rdev);
7898                 if (r)
7899                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7900         }
7901
7902         r = -ENOENT;
7903
7904         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7905         if (ring->ring_size)
7906                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7907                                      VCE_CMD_NO_OP);
7908
7909         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7910         if (ring->ring_size)
7911                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7912                                      VCE_CMD_NO_OP);
7913
7914         if (!r)
7915                 r = vce_v1_0_init(rdev);
7916         else if (r != -ENOENT)
7917                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
7918
7919         r = radeon_ib_pool_init(rdev);
7920         if (r) {
7921                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7922                 return r;
7923         }
7924
7925         r = radeon_vm_manager_init(rdev);
7926         if (r) {
7927                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7928                 return r;
7929         }
7930
7931         r = dce6_audio_init(rdev);
7932         if (r)
7933                 return r;
7934
7935         return 0;
7936 }
7937
7938 /**
7939  * cik_resume - resume the asic to a functional state
7940  *
7941  * @rdev: radeon_device pointer
7942  *
7943  * Programs the asic to a functional state (CIK).
7944  * Called at resume.
7945  * Returns 0 for success, error for failure.
7946  */
7947 int cik_resume(struct radeon_device *rdev)
7948 {
7949         int r;
7950
7951         /* post card */
7952         atom_asic_init(rdev->mode_info.atom_context);
7953
7954         /* init golden registers */
7955         cik_init_golden_registers(rdev);
7956
7957         radeon_pm_resume(rdev);
7958
7959         rdev->accel_working = true;
7960         r = cik_startup(rdev);
7961         if (r) {
7962                 DRM_ERROR("cik startup failed on resume\n");
7963                 rdev->accel_working = false;
7964                 return r;
7965         }
7966
7967         return r;
7968
7969 }
7970
7971 /**
7972  * cik_suspend - suspend the asic
7973  *
7974  * @rdev: radeon_device pointer
7975  *
7976  * Bring the chip into a state suitable for suspend (CIK).
7977  * Called at suspend.
7978  * Returns 0 for success.
7979  */
7980 int cik_suspend(struct radeon_device *rdev)
7981 {
7982         radeon_pm_suspend(rdev);
7983         dce6_audio_fini(rdev);
7984         radeon_vm_manager_fini(rdev);
7985         cik_cp_enable(rdev, false);
7986         cik_sdma_enable(rdev, false);
7987         uvd_v1_0_fini(rdev);
7988         radeon_uvd_suspend(rdev);
7989         radeon_vce_suspend(rdev);
7990         cik_fini_pg(rdev);
7991         cik_fini_cg(rdev);
7992         cik_irq_suspend(rdev);
7993         radeon_wb_disable(rdev);
7994         cik_pcie_gart_disable(rdev);
7995         return 0;
7996 }
7997
7998 /* Plan is to move initialization in that function and use
7999  * helper function so that radeon_device_init pretty much
8000  * do nothing more than calling asic specific function. This
8001  * should also allow to remove a bunch of callback function
8002  * like vram_info.
8003  */
8004 /**
8005  * cik_init - asic specific driver and hw init
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Setup asic specific driver variables and program the hw
8010  * to a functional state (CIK).
8011  * Called at driver startup.
8012  * Returns 0 for success, errors for failure.
8013  */
8014 int cik_init(struct radeon_device *rdev)
8015 {
8016         struct radeon_ring *ring;
8017         int r;
8018
8019         /* Read BIOS */
8020         if (!radeon_get_bios(rdev)) {
8021                 if (ASIC_IS_AVIVO(rdev))
8022                         return -EINVAL;
8023         }
8024         /* Must be an ATOMBIOS */
8025         if (!rdev->is_atom_bios) {
8026                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8027                 return -EINVAL;
8028         }
8029         r = radeon_atombios_init(rdev);
8030         if (r)
8031                 return r;
8032
8033         /* Post card if necessary */
8034         if (!radeon_card_posted(rdev)) {
8035                 if (!rdev->bios) {
8036                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8037                         return -EINVAL;
8038                 }
8039                 DRM_INFO("GPU not posted. posting now...\n");
8040                 atom_asic_init(rdev->mode_info.atom_context);
8041         }
8042         /* init golden registers */
8043         cik_init_golden_registers(rdev);
8044         /* Initialize scratch registers */
8045         cik_scratch_init(rdev);
8046         /* Initialize surface registers */
8047         radeon_surface_init(rdev);
8048         /* Initialize clocks */
8049         radeon_get_clock_info(rdev->ddev);
8050
8051         /* Fence driver */
8052         r = radeon_fence_driver_init(rdev);
8053         if (r)
8054                 return r;
8055
8056         /* initialize memory controller */
8057         r = cik_mc_init(rdev);
8058         if (r)
8059                 return r;
8060         /* Memory manager */
8061         r = radeon_bo_init(rdev);
8062         if (r)
8063                 return r;
8064
8065         if (rdev->flags & RADEON_IS_IGP) {
8066                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8067                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8068                         r = cik_init_microcode(rdev);
8069                         if (r) {
8070                                 DRM_ERROR("Failed to load firmware!\n");
8071                                 return r;
8072                         }
8073                 }
8074         } else {
8075                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8076                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8077                     !rdev->mc_fw) {
8078                         r = cik_init_microcode(rdev);
8079                         if (r) {
8080                                 DRM_ERROR("Failed to load firmware!\n");
8081                                 return r;
8082                         }
8083                 }
8084         }
8085
8086         /* Initialize power management */
8087         radeon_pm_init(rdev);
8088
8089         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8090         ring->ring_obj = NULL;
8091         r600_ring_init(rdev, ring, 1024 * 1024);
8092
8093         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8094         ring->ring_obj = NULL;
8095         r600_ring_init(rdev, ring, 1024 * 1024);
8096         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8097         if (r)
8098                 return r;
8099
8100         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8101         ring->ring_obj = NULL;
8102         r600_ring_init(rdev, ring, 1024 * 1024);
8103         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8104         if (r)
8105                 return r;
8106
8107         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8108         ring->ring_obj = NULL;
8109         r600_ring_init(rdev, ring, 256 * 1024);
8110
8111         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8112         ring->ring_obj = NULL;
8113         r600_ring_init(rdev, ring, 256 * 1024);
8114
8115         r = radeon_uvd_init(rdev);
8116         if (!r) {
8117                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8118                 ring->ring_obj = NULL;
8119                 r600_ring_init(rdev, ring, 4096);
8120         }
8121
8122         r = radeon_vce_init(rdev);
8123         if (!r) {
8124                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8125                 ring->ring_obj = NULL;
8126                 r600_ring_init(rdev, ring, 4096);
8127
8128                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8129                 ring->ring_obj = NULL;
8130                 r600_ring_init(rdev, ring, 4096);
8131         }
8132
8133         rdev->ih.ring_obj = NULL;
8134         r600_ih_ring_init(rdev, 64 * 1024);
8135
8136         r = r600_pcie_gart_init(rdev);
8137         if (r)
8138                 return r;
8139
8140         rdev->accel_working = true;
8141         r = cik_startup(rdev);
8142         if (r) {
8143                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8144                 cik_cp_fini(rdev);
8145                 cik_sdma_fini(rdev);
8146                 cik_irq_fini(rdev);
8147                 sumo_rlc_fini(rdev);
8148                 cik_mec_fini(rdev);
8149                 radeon_wb_fini(rdev);
8150                 radeon_ib_pool_fini(rdev);
8151                 radeon_vm_manager_fini(rdev);
8152                 radeon_irq_kms_fini(rdev);
8153                 cik_pcie_gart_fini(rdev);
8154                 rdev->accel_working = false;
8155         }
8156
8157         /* Don't start up if the MC ucode is missing.
8158          * The default clocks and voltages before the MC ucode
8159          * is loaded are not suffient for advanced operations.
8160          */
8161         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8162                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8163                 return -EINVAL;
8164         }
8165
8166         return 0;
8167 }
8168
8169 /**
8170  * cik_fini - asic specific driver and hw fini
8171  *
8172  * @rdev: radeon_device pointer
8173  *
8174  * Tear down the asic specific driver variables and program the hw
8175  * to an idle state (CIK).
8176  * Called at driver unload.
8177  */
8178 void cik_fini(struct radeon_device *rdev)
8179 {
8180         radeon_pm_fini(rdev);
8181         cik_cp_fini(rdev);
8182         cik_sdma_fini(rdev);
8183         cik_fini_pg(rdev);
8184         cik_fini_cg(rdev);
8185         cik_irq_fini(rdev);
8186         sumo_rlc_fini(rdev);
8187         cik_mec_fini(rdev);
8188         radeon_wb_fini(rdev);
8189         radeon_vm_manager_fini(rdev);
8190         radeon_ib_pool_fini(rdev);
8191         radeon_irq_kms_fini(rdev);
8192         uvd_v1_0_fini(rdev);
8193         radeon_uvd_fini(rdev);
8194         radeon_vce_fini(rdev);
8195         cik_pcie_gart_fini(rdev);
8196         r600_vram_scratch_fini(rdev);
8197         radeon_gem_fini(rdev);
8198         radeon_fence_driver_fini(rdev);
8199         radeon_bo_fini(rdev);
8200         radeon_atombios_fini(rdev);
8201         kfree(rdev->bios);
8202         rdev->bios = NULL;
8203 }
8204
8205 void dce8_program_fmt(struct drm_encoder *encoder)
8206 {
8207         struct drm_device *dev = encoder->dev;
8208         struct radeon_device *rdev = dev->dev_private;
8209         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8210         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8211         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8212         int bpc = 0;
8213         u32 tmp = 0;
8214         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8215
8216         if (connector) {
8217                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8218                 bpc = radeon_get_monitor_bpc(connector);
8219                 dither = radeon_connector->dither;
8220         }
8221
8222         /* LVDS/eDP FMT is set up by atom */
8223         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8224                 return;
8225
8226         /* not needed for analog */
8227         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8228             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8229                 return;
8230
8231         if (bpc == 0)
8232                 return;
8233
8234         switch (bpc) {
8235         case 6:
8236                 if (dither == RADEON_FMT_DITHER_ENABLE)
8237                         /* XXX sort out optimal dither settings */
8238                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8239                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8240                 else
8241                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8242                 break;
8243         case 8:
8244                 if (dither == RADEON_FMT_DITHER_ENABLE)
8245                         /* XXX sort out optimal dither settings */
8246                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8247                                 FMT_RGB_RANDOM_ENABLE |
8248                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8249                 else
8250                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8251                 break;
8252         case 10:
8253                 if (dither == RADEON_FMT_DITHER_ENABLE)
8254                         /* XXX sort out optimal dither settings */
8255                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8256                                 FMT_RGB_RANDOM_ENABLE |
8257                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8258                 else
8259                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8260                 break;
8261         default:
8262                 /* not needed */
8263                 break;
8264         }
8265
8266         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8267 }
8268
8269 /* display watermark setup */
8270 /**
8271  * dce8_line_buffer_adjust - Set up the line buffer
8272  *
8273  * @rdev: radeon_device pointer
8274  * @radeon_crtc: the selected display controller
8275  * @mode: the current display mode on the selected display
8276  * controller
8277  *
8278  * Setup up the line buffer allocation for
8279  * the selected display controller (CIK).
8280  * Returns the line buffer size in pixels.
8281  */
8282 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8283                                    struct radeon_crtc *radeon_crtc,
8284                                    struct drm_display_mode *mode)
8285 {
8286         u32 tmp, buffer_alloc, i;
8287         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8288         /*
8289          * Line Buffer Setup
8290          * There are 6 line buffers, one for each display controllers.
8291          * There are 3 partitions per LB. Select the number of partitions
8292          * to enable based on the display width.  For display widths larger
8293          * than 4096, you need use to use 2 display controllers and combine
8294          * them using the stereo blender.
8295          */
8296         if (radeon_crtc->base.enabled && mode) {
8297                 if (mode->crtc_hdisplay < 1920) {
8298                         tmp = 1;
8299                         buffer_alloc = 2;
8300                 } else if (mode->crtc_hdisplay < 2560) {
8301                         tmp = 2;
8302                         buffer_alloc = 2;
8303                 } else if (mode->crtc_hdisplay < 4096) {
8304                         tmp = 0;
8305                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8306                 } else {
8307                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8308                         tmp = 0;
8309                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8310                 }
8311         } else {
8312                 tmp = 1;
8313                 buffer_alloc = 0;
8314         }
8315
8316         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8317                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8318
8319         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8320                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8321         for (i = 0; i < rdev->usec_timeout; i++) {
8322                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8323                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8324                         break;
8325                 udelay(1);
8326         }
8327
8328         if (radeon_crtc->base.enabled && mode) {
8329                 switch (tmp) {
8330                 case 0:
8331                 default:
8332                         return 4096 * 2;
8333                 case 1:
8334                         return 1920 * 2;
8335                 case 2:
8336                         return 2560 * 2;
8337                 }
8338         }
8339
8340         /* controller not enabled, so no lb used */
8341         return 0;
8342 }
8343
8344 /**
8345  * cik_get_number_of_dram_channels - get the number of dram channels
8346  *
8347  * @rdev: radeon_device pointer
8348  *
8349  * Look up the number of video ram channels (CIK).
8350  * Used for display watermark bandwidth calculations
8351  * Returns the number of dram channels
8352  */
8353 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8354 {
8355         u32 tmp = RREG32(MC_SHARED_CHMAP);
8356
8357         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8358         case 0:
8359         default:
8360                 return 1;
8361         case 1:
8362                 return 2;
8363         case 2:
8364                 return 4;
8365         case 3:
8366                 return 8;
8367         case 4:
8368                 return 3;
8369         case 5:
8370                 return 6;
8371         case 6:
8372                 return 10;
8373         case 7:
8374                 return 12;
8375         case 8:
8376                 return 16;
8377         }
8378 }
8379
8380 struct dce8_wm_params {
8381         u32 dram_channels; /* number of dram channels */
8382         u32 yclk;          /* bandwidth per dram data pin in kHz */
8383         u32 sclk;          /* engine clock in kHz */
8384         u32 disp_clk;      /* display clock in kHz */
8385         u32 src_width;     /* viewport width */
8386         u32 active_time;   /* active display time in ns */
8387         u32 blank_time;    /* blank time in ns */
8388         bool interlaced;    /* mode is interlaced */
8389         fixed20_12 vsc;    /* vertical scale ratio */
8390         u32 num_heads;     /* number of active crtcs */
8391         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8392         u32 lb_size;       /* line buffer allocated to pipe */
8393         u32 vtaps;         /* vertical scaler taps */
8394 };
8395
8396 /**
8397  * dce8_dram_bandwidth - get the dram bandwidth
8398  *
8399  * @wm: watermark calculation data
8400  *
8401  * Calculate the raw dram bandwidth (CIK).
8402  * Used for display watermark bandwidth calculations
8403  * Returns the dram bandwidth in MBytes/s
8404  */
8405 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8406 {
8407         /* Calculate raw DRAM Bandwidth */
8408         fixed20_12 dram_efficiency; /* 0.7 */
8409         fixed20_12 yclk, dram_channels, bandwidth;
8410         fixed20_12 a;
8411
8412         a.full = dfixed_const(1000);
8413         yclk.full = dfixed_const(wm->yclk);
8414         yclk.full = dfixed_div(yclk, a);
8415         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8416         a.full = dfixed_const(10);
8417         dram_efficiency.full = dfixed_const(7);
8418         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8419         bandwidth.full = dfixed_mul(dram_channels, yclk);
8420         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8421
8422         return dfixed_trunc(bandwidth);
8423 }
8424
8425 /**
8426  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8427  *
8428  * @wm: watermark calculation data
8429  *
8430  * Calculate the dram bandwidth used for display (CIK).
8431  * Used for display watermark bandwidth calculations
8432  * Returns the dram bandwidth for display in MBytes/s
8433  */
8434 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8435 {
8436         /* Calculate DRAM Bandwidth and the part allocated to display. */
8437         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8438         fixed20_12 yclk, dram_channels, bandwidth;
8439         fixed20_12 a;
8440
8441         a.full = dfixed_const(1000);
8442         yclk.full = dfixed_const(wm->yclk);
8443         yclk.full = dfixed_div(yclk, a);
8444         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8445         a.full = dfixed_const(10);
8446         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8447         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8448         bandwidth.full = dfixed_mul(dram_channels, yclk);
8449         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8450
8451         return dfixed_trunc(bandwidth);
8452 }
8453
8454 /**
8455  * dce8_data_return_bandwidth - get the data return bandwidth
8456  *
8457  * @wm: watermark calculation data
8458  *
8459  * Calculate the data return bandwidth used for display (CIK).
8460  * Used for display watermark bandwidth calculations
8461  * Returns the data return bandwidth in MBytes/s
8462  */
8463 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8464 {
8465         /* Calculate the display Data return Bandwidth */
8466         fixed20_12 return_efficiency; /* 0.8 */
8467         fixed20_12 sclk, bandwidth;
8468         fixed20_12 a;
8469
8470         a.full = dfixed_const(1000);
8471         sclk.full = dfixed_const(wm->sclk);
8472         sclk.full = dfixed_div(sclk, a);
8473         a.full = dfixed_const(10);
8474         return_efficiency.full = dfixed_const(8);
8475         return_efficiency.full = dfixed_div(return_efficiency, a);
8476         a.full = dfixed_const(32);
8477         bandwidth.full = dfixed_mul(a, sclk);
8478         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8479
8480         return dfixed_trunc(bandwidth);
8481 }
8482
8483 /**
8484  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8485  *
8486  * @wm: watermark calculation data
8487  *
8488  * Calculate the dmif bandwidth used for display (CIK).
8489  * Used for display watermark bandwidth calculations
8490  * Returns the dmif bandwidth in MBytes/s
8491  */
8492 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8493 {
8494         /* Calculate the DMIF Request Bandwidth */
8495         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8496         fixed20_12 disp_clk, bandwidth;
8497         fixed20_12 a, b;
8498
8499         a.full = dfixed_const(1000);
8500         disp_clk.full = dfixed_const(wm->disp_clk);
8501         disp_clk.full = dfixed_div(disp_clk, a);
8502         a.full = dfixed_const(32);
8503         b.full = dfixed_mul(a, disp_clk);
8504
8505         a.full = dfixed_const(10);
8506         disp_clk_request_efficiency.full = dfixed_const(8);
8507         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8508
8509         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8510
8511         return dfixed_trunc(bandwidth);
8512 }
8513
8514 /**
8515  * dce8_available_bandwidth - get the min available bandwidth
8516  *
8517  * @wm: watermark calculation data
8518  *
8519  * Calculate the min available bandwidth used for display (CIK).
8520  * Used for display watermark bandwidth calculations
8521  * Returns the min available bandwidth in MBytes/s
8522  */
8523 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8524 {
8525         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8526         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8527         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8528         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8529
8530         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8531 }
8532
8533 /**
8534  * dce8_average_bandwidth - get the average available bandwidth
8535  *
8536  * @wm: watermark calculation data
8537  *
8538  * Calculate the average available bandwidth used for display (CIK).
8539  * Used for display watermark bandwidth calculations
8540  * Returns the average available bandwidth in MBytes/s
8541  */
8542 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8543 {
8544         /* Calculate the display mode Average Bandwidth
8545          * DisplayMode should contain the source and destination dimensions,
8546          * timing, etc.
8547          */
8548         fixed20_12 bpp;
8549         fixed20_12 line_time;
8550         fixed20_12 src_width;
8551         fixed20_12 bandwidth;
8552         fixed20_12 a;
8553
8554         a.full = dfixed_const(1000);
8555         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8556         line_time.full = dfixed_div(line_time, a);
8557         bpp.full = dfixed_const(wm->bytes_per_pixel);
8558         src_width.full = dfixed_const(wm->src_width);
8559         bandwidth.full = dfixed_mul(src_width, bpp);
8560         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8561         bandwidth.full = dfixed_div(bandwidth, line_time);
8562
8563         return dfixed_trunc(bandwidth);
8564 }
8565
8566 /**
8567  * dce8_latency_watermark - get the latency watermark
8568  *
8569  * @wm: watermark calculation data
8570  *
8571  * Calculate the latency watermark (CIK).
8572  * Used for display watermark bandwidth calculations
8573  * Returns the latency watermark in ns
8574  */
8575 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8576 {
8577         /* First calculate the latency in ns */
8578         u32 mc_latency = 2000; /* 2000 ns. */
8579         u32 available_bandwidth = dce8_available_bandwidth(wm);
8580         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8581         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8582         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8583         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8584                 (wm->num_heads * cursor_line_pair_return_time);
8585         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8586         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8587         u32 tmp, dmif_size = 12288;
8588         fixed20_12 a, b, c;
8589
8590         if (wm->num_heads == 0)
8591                 return 0;
8592
8593         a.full = dfixed_const(2);
8594         b.full = dfixed_const(1);
8595         if ((wm->vsc.full > a.full) ||
8596             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8597             (wm->vtaps >= 5) ||
8598             ((wm->vsc.full >= a.full) && wm->interlaced))
8599                 max_src_lines_per_dst_line = 4;
8600         else
8601                 max_src_lines_per_dst_line = 2;
8602
8603         a.full = dfixed_const(available_bandwidth);
8604         b.full = dfixed_const(wm->num_heads);
8605         a.full = dfixed_div(a, b);
8606
8607         b.full = dfixed_const(mc_latency + 512);
8608         c.full = dfixed_const(wm->disp_clk);
8609         b.full = dfixed_div(b, c);
8610
8611         c.full = dfixed_const(dmif_size);
8612         b.full = dfixed_div(c, b);
8613
8614         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8615
8616         b.full = dfixed_const(1000);
8617         c.full = dfixed_const(wm->disp_clk);
8618         b.full = dfixed_div(c, b);
8619         c.full = dfixed_const(wm->bytes_per_pixel);
8620         b.full = dfixed_mul(b, c);
8621
8622         lb_fill_bw = min(tmp, dfixed_trunc(b));
8623
8624         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8625         b.full = dfixed_const(1000);
8626         c.full = dfixed_const(lb_fill_bw);
8627         b.full = dfixed_div(c, b);
8628         a.full = dfixed_div(a, b);
8629         line_fill_time = dfixed_trunc(a);
8630
8631         if (line_fill_time < wm->active_time)
8632                 return latency;
8633         else
8634                 return latency + (line_fill_time - wm->active_time);
8635
8636 }
8637
8638 /**
8639  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8640  * average and available dram bandwidth
8641  *
8642  * @wm: watermark calculation data
8643  *
8644  * Check if the display average bandwidth fits in the display
8645  * dram bandwidth (CIK).
8646  * Used for display watermark bandwidth calculations
8647  * Returns true if the display fits, false if not.
8648  */
8649 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8650 {
8651         if (dce8_average_bandwidth(wm) <=
8652             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8653                 return true;
8654         else
8655                 return false;
8656 }
8657
8658 /**
8659  * dce8_average_bandwidth_vs_available_bandwidth - check
8660  * average and available bandwidth
8661  *
8662  * @wm: watermark calculation data
8663  *
8664  * Check if the display average bandwidth fits in the display
8665  * available bandwidth (CIK).
8666  * Used for display watermark bandwidth calculations
8667  * Returns true if the display fits, false if not.
8668  */
8669 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8670 {
8671         if (dce8_average_bandwidth(wm) <=
8672             (dce8_available_bandwidth(wm) / wm->num_heads))
8673                 return true;
8674         else
8675                 return false;
8676 }
8677
8678 /**
8679  * dce8_check_latency_hiding - check latency hiding
8680  *
8681  * @wm: watermark calculation data
8682  *
8683  * Check latency hiding (CIK).
8684  * Used for display watermark bandwidth calculations
8685  * Returns true if the display fits, false if not.
8686  */
8687 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8688 {
8689         u32 lb_partitions = wm->lb_size / wm->src_width;
8690         u32 line_time = wm->active_time + wm->blank_time;
8691         u32 latency_tolerant_lines;
8692         u32 latency_hiding;
8693         fixed20_12 a;
8694
8695         a.full = dfixed_const(1);
8696         if (wm->vsc.full > a.full)
8697                 latency_tolerant_lines = 1;
8698         else {
8699                 if (lb_partitions <= (wm->vtaps + 1))
8700                         latency_tolerant_lines = 1;
8701                 else
8702                         latency_tolerant_lines = 2;
8703         }
8704
8705         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8706
8707         if (dce8_latency_watermark(wm) <= latency_hiding)
8708                 return true;
8709         else
8710                 return false;
8711 }
8712
8713 /**
8714  * dce8_program_watermarks - program display watermarks
8715  *
8716  * @rdev: radeon_device pointer
8717  * @radeon_crtc: the selected display controller
8718  * @lb_size: line buffer size
8719  * @num_heads: number of display controllers in use
8720  *
8721  * Calculate and program the display watermarks for the
8722  * selected display controller (CIK).
8723  */
8724 static void dce8_program_watermarks(struct radeon_device *rdev,
8725                                     struct radeon_crtc *radeon_crtc,
8726                                     u32 lb_size, u32 num_heads)
8727 {
8728         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8729         struct dce8_wm_params wm_low, wm_high;
8730         u32 pixel_period;
8731         u32 line_time = 0;
8732         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8733         u32 tmp, wm_mask;
8734
8735         if (radeon_crtc->base.enabled && num_heads && mode) {
8736                 pixel_period = 1000000 / (u32)mode->clock;
8737                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8738
8739                 /* watermark for high clocks */
8740                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8741                     rdev->pm.dpm_enabled) {
8742                         wm_high.yclk =
8743                                 radeon_dpm_get_mclk(rdev, false) * 10;
8744                         wm_high.sclk =
8745                                 radeon_dpm_get_sclk(rdev, false) * 10;
8746                 } else {
8747                         wm_high.yclk = rdev->pm.current_mclk * 10;
8748                         wm_high.sclk = rdev->pm.current_sclk * 10;
8749                 }
8750
8751                 wm_high.disp_clk = mode->clock;
8752                 wm_high.src_width = mode->crtc_hdisplay;
8753                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8754                 wm_high.blank_time = line_time - wm_high.active_time;
8755                 wm_high.interlaced = false;
8756                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8757                         wm_high.interlaced = true;
8758                 wm_high.vsc = radeon_crtc->vsc;
8759                 wm_high.vtaps = 1;
8760                 if (radeon_crtc->rmx_type != RMX_OFF)
8761                         wm_high.vtaps = 2;
8762                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8763                 wm_high.lb_size = lb_size;
8764                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8765                 wm_high.num_heads = num_heads;
8766
8767                 /* set for high clocks */
8768                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8769
8770                 /* possibly force display priority to high */
8771                 /* should really do this at mode validation time... */
8772                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8773                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8774                     !dce8_check_latency_hiding(&wm_high) ||
8775                     (rdev->disp_priority == 2)) {
8776                         DRM_DEBUG_KMS("force priority to high\n");
8777                 }
8778
8779                 /* watermark for low clocks */
8780                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8781                     rdev->pm.dpm_enabled) {
8782                         wm_low.yclk =
8783                                 radeon_dpm_get_mclk(rdev, true) * 10;
8784                         wm_low.sclk =
8785                                 radeon_dpm_get_sclk(rdev, true) * 10;
8786                 } else {
8787                         wm_low.yclk = rdev->pm.current_mclk * 10;
8788                         wm_low.sclk = rdev->pm.current_sclk * 10;
8789                 }
8790
8791                 wm_low.disp_clk = mode->clock;
8792                 wm_low.src_width = mode->crtc_hdisplay;
8793                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8794                 wm_low.blank_time = line_time - wm_low.active_time;
8795                 wm_low.interlaced = false;
8796                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8797                         wm_low.interlaced = true;
8798                 wm_low.vsc = radeon_crtc->vsc;
8799                 wm_low.vtaps = 1;
8800                 if (radeon_crtc->rmx_type != RMX_OFF)
8801                         wm_low.vtaps = 2;
8802                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8803                 wm_low.lb_size = lb_size;
8804                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8805                 wm_low.num_heads = num_heads;
8806
8807                 /* set for low clocks */
8808                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8809
8810                 /* possibly force display priority to high */
8811                 /* should really do this at mode validation time... */
8812                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8813                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8814                     !dce8_check_latency_hiding(&wm_low) ||
8815                     (rdev->disp_priority == 2)) {
8816                         DRM_DEBUG_KMS("force priority to high\n");
8817                 }
8818         }
8819
8820         /* select wm A */
8821         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8822         tmp = wm_mask;
8823         tmp &= ~LATENCY_WATERMARK_MASK(3);
8824         tmp |= LATENCY_WATERMARK_MASK(1);
8825         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8826         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8827                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8828                 LATENCY_HIGH_WATERMARK(line_time)));
8829         /* select wm B */
8830         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8831         tmp &= ~LATENCY_WATERMARK_MASK(3);
8832         tmp |= LATENCY_WATERMARK_MASK(2);
8833         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8834         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8835                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8836                 LATENCY_HIGH_WATERMARK(line_time)));
8837         /* restore original selection */
8838         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8839
8840         /* save values for DPM */
8841         radeon_crtc->line_time = line_time;
8842         radeon_crtc->wm_high = latency_watermark_a;
8843         radeon_crtc->wm_low = latency_watermark_b;
8844 }
8845
8846 /**
8847  * dce8_bandwidth_update - program display watermarks
8848  *
8849  * @rdev: radeon_device pointer
8850  *
8851  * Calculate and program the display watermarks and line
8852  * buffer allocation (CIK).
8853  */
8854 void dce8_bandwidth_update(struct radeon_device *rdev)
8855 {
8856         struct drm_display_mode *mode = NULL;
8857         u32 num_heads = 0, lb_size;
8858         int i;
8859
8860         radeon_update_display_priority(rdev);
8861
8862         for (i = 0; i < rdev->num_crtc; i++) {
8863                 if (rdev->mode_info.crtcs[i]->base.enabled)
8864                         num_heads++;
8865         }
8866         for (i = 0; i < rdev->num_crtc; i++) {
8867                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8868                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8869                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8870         }
8871 }
8872
8873 /**
8874  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8875  *
8876  * @rdev: radeon_device pointer
8877  *
8878  * Fetches a GPU clock counter snapshot (SI).
8879  * Returns the 64 bit clock counter snapshot.
8880  */
8881 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8882 {
8883         uint64_t clock;
8884
8885         mutex_lock(&rdev->gpu_clock_mutex);
8886         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8887         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8888                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8889         mutex_unlock(&rdev->gpu_clock_mutex);
8890         return clock;
8891 }
8892
8893 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8894                               u32 cntl_reg, u32 status_reg)
8895 {
8896         int r, i;
8897         struct atom_clock_dividers dividers;
8898         uint32_t tmp;
8899
8900         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8901                                            clock, false, &dividers);
8902         if (r)
8903                 return r;
8904
8905         tmp = RREG32_SMC(cntl_reg);
8906         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8907         tmp |= dividers.post_divider;
8908         WREG32_SMC(cntl_reg, tmp);
8909
8910         for (i = 0; i < 100; i++) {
8911                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8912                         break;
8913                 mdelay(10);
8914         }
8915         if (i == 100)
8916                 return -ETIMEDOUT;
8917
8918         return 0;
8919 }
8920
8921 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8922 {
8923         int r = 0;
8924
8925         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8926         if (r)
8927                 return r;
8928
8929         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8930         return r;
8931 }
8932
8933 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
8934 {
8935         int r, i;
8936         struct atom_clock_dividers dividers;
8937         u32 tmp;
8938
8939         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8940                                            ecclk, false, &dividers);
8941         if (r)
8942                 return r;
8943
8944         for (i = 0; i < 100; i++) {
8945                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8946                         break;
8947                 mdelay(10);
8948         }
8949         if (i == 100)
8950                 return -ETIMEDOUT;
8951
8952         tmp = RREG32_SMC(CG_ECLK_CNTL);
8953         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
8954         tmp |= dividers.post_divider;
8955         WREG32_SMC(CG_ECLK_CNTL, tmp);
8956
8957         for (i = 0; i < 100; i++) {
8958                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
8959                         break;
8960                 mdelay(10);
8961         }
8962         if (i == 100)
8963                 return -ETIMEDOUT;
8964
8965         return 0;
8966 }
8967
8968 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8969 {
8970         struct pci_dev *root = rdev->pdev->bus->self;
8971         int bridge_pos, gpu_pos;
8972         u32 speed_cntl, mask, current_data_rate;
8973         int ret, i;
8974         u16 tmp16;
8975
8976         if (radeon_pcie_gen2 == 0)
8977                 return;
8978
8979         if (rdev->flags & RADEON_IS_IGP)
8980                 return;
8981
8982         if (!(rdev->flags & RADEON_IS_PCIE))
8983                 return;
8984
8985         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8986         if (ret != 0)
8987                 return;
8988
8989         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8990                 return;
8991
8992         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8993         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8994                 LC_CURRENT_DATA_RATE_SHIFT;
8995         if (mask & DRM_PCIE_SPEED_80) {
8996                 if (current_data_rate == 2) {
8997                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8998                         return;
8999                 }
9000                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9001         } else if (mask & DRM_PCIE_SPEED_50) {
9002                 if (current_data_rate == 1) {
9003                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9004                         return;
9005                 }
9006                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9007         }
9008
9009         bridge_pos = pci_pcie_cap(root);
9010         if (!bridge_pos)
9011                 return;
9012
9013         gpu_pos = pci_pcie_cap(rdev->pdev);
9014         if (!gpu_pos)
9015                 return;
9016
9017         if (mask & DRM_PCIE_SPEED_80) {
9018                 /* re-try equalization if gen3 is not already enabled */
9019                 if (current_data_rate != 2) {
9020                         u16 bridge_cfg, gpu_cfg;
9021                         u16 bridge_cfg2, gpu_cfg2;
9022                         u32 max_lw, current_lw, tmp;
9023
9024                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9025                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9026
9027                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9028                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9029
9030                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9031                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9032
9033                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9034                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9035                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9036
9037                         if (current_lw < max_lw) {
9038                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9039                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9040                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9041                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9042                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9043                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9044                                 }
9045                         }
9046
9047                         for (i = 0; i < 10; i++) {
9048                                 /* check status */
9049                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9050                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9051                                         break;
9052
9053                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9054                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9055
9056                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9057                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9058
9059                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9060                                 tmp |= LC_SET_QUIESCE;
9061                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9062
9063                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9064                                 tmp |= LC_REDO_EQ;
9065                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9066
9067                                 mdelay(100);
9068
9069                                 /* linkctl */
9070                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9071                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9072                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9073                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9074
9075                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9076                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9077                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9078                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9079
9080                                 /* linkctl2 */
9081                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9082                                 tmp16 &= ~((1 << 4) | (7 << 9));
9083                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9084                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9085
9086                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9087                                 tmp16 &= ~((1 << 4) | (7 << 9));
9088                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9089                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9090
9091                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9092                                 tmp &= ~LC_SET_QUIESCE;
9093                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9094                         }
9095                 }
9096         }
9097
9098         /* set the link speed */
9099         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9100         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9101         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9102
9103         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9104         tmp16 &= ~0xf;
9105         if (mask & DRM_PCIE_SPEED_80)
9106                 tmp16 |= 3; /* gen3 */
9107         else if (mask & DRM_PCIE_SPEED_50)
9108                 tmp16 |= 2; /* gen2 */
9109         else
9110                 tmp16 |= 1; /* gen1 */
9111         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9112
9113         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9114         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9115         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9116
9117         for (i = 0; i < rdev->usec_timeout; i++) {
9118                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9119                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9120                         break;
9121                 udelay(1);
9122         }
9123 }
9124
9125 static void cik_program_aspm(struct radeon_device *rdev)
9126 {
9127         u32 data, orig;
9128         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9129         bool disable_clkreq = false;
9130
9131         if (radeon_aspm == 0)
9132                 return;
9133
9134         /* XXX double check IGPs */
9135         if (rdev->flags & RADEON_IS_IGP)
9136                 return;
9137
9138         if (!(rdev->flags & RADEON_IS_PCIE))
9139                 return;
9140
9141         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9142         data &= ~LC_XMIT_N_FTS_MASK;
9143         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9144         if (orig != data)
9145                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9146
9147         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9148         data |= LC_GO_TO_RECOVERY;
9149         if (orig != data)
9150                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9151
9152         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9153         data |= P_IGNORE_EDB_ERR;
9154         if (orig != data)
9155                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9156
9157         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9158         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9159         data |= LC_PMI_TO_L1_DIS;
9160         if (!disable_l0s)
9161                 data |= LC_L0S_INACTIVITY(7);
9162
9163         if (!disable_l1) {
9164                 data |= LC_L1_INACTIVITY(7);
9165                 data &= ~LC_PMI_TO_L1_DIS;
9166                 if (orig != data)
9167                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9168
9169                 if (!disable_plloff_in_l1) {
9170                         bool clk_req_support;
9171
9172                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9173                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9174                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9175                         if (orig != data)
9176                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9177
9178                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9179                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9180                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9181                         if (orig != data)
9182                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9183
9184                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9185                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9186                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9187                         if (orig != data)
9188                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9189
9190                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9191                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9192                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9193                         if (orig != data)
9194                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9195
9196                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9197                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9198                         data |= LC_DYN_LANES_PWR_STATE(3);
9199                         if (orig != data)
9200                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9201
9202                         if (!disable_clkreq) {
9203                                 struct pci_dev *root = rdev->pdev->bus->self;
9204                                 u32 lnkcap;
9205
9206                                 clk_req_support = false;
9207                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9208                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9209                                         clk_req_support = true;
9210                         } else {
9211                                 clk_req_support = false;
9212                         }
9213
9214                         if (clk_req_support) {
9215                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9216                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9217                                 if (orig != data)
9218                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9219
9220                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9221                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9222                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9223                                 if (orig != data)
9224                                         WREG32_SMC(THM_CLK_CNTL, data);
9225
9226                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9227                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9228                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9229                                 if (orig != data)
9230                                         WREG32_SMC(MISC_CLK_CTRL, data);
9231
9232                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9233                                 data &= ~BCLK_AS_XCLK;
9234                                 if (orig != data)
9235                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9236
9237                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9238                                 data &= ~FORCE_BIF_REFCLK_EN;
9239                                 if (orig != data)
9240                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9241
9242                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9243                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9244                                 data |= MPLL_CLKOUT_SEL(4);
9245                                 if (orig != data)
9246                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9247                         }
9248                 }
9249         } else {
9250                 if (orig != data)
9251                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9252         }
9253
9254         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9255         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9256         if (orig != data)
9257                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9258
9259         if (!disable_l0s) {
9260                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9261                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9262                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9263                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9264                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9265                                 data &= ~LC_L0S_INACTIVITY_MASK;
9266                                 if (orig != data)
9267                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9268                         }
9269                 }
9270         }
9271 }