drm/radeon: consolidate cp hdp flushing code for CIK
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570         if (index < rdev->doorbell.num_doorbells) {
1571                 return readl(rdev->doorbell.ptr + index);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590         if (index < rdev->doorbell.num_doorbells) {
1591                 writel(v, rdev->doorbell.ptr + index);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432                 }
2433         } else if (num_pipe_configs == 4) {
2434                 if (num_rbs == 4) {
2435                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436                                 switch (reg_offset) {
2437                                 case 0:
2438                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442                                         break;
2443                                 case 1:
2444                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448                                         break;
2449                                 case 2:
2450                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454                                         break;
2455                                 case 3:
2456                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460                                         break;
2461                                 case 4:
2462                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                                          TILE_SPLIT(split_equal_to_row_size));
2466                                         break;
2467                                 case 5:
2468                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                                         break;
2471                                 case 6:
2472                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476                                         break;
2477                                 case 7:
2478                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                                          TILE_SPLIT(split_equal_to_row_size));
2482                                         break;
2483                                 case 8:
2484                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486                                         break;
2487                                 case 9:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490                                         break;
2491                                 case 10:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                                         break;
2497                                 case 11:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                                         break;
2503                                 case 12:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                                         break;
2509                                 case 13:
2510                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512                                         break;
2513                                 case 14:
2514                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                                         break;
2519                                 case 16:
2520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                                         break;
2525                                 case 17:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 27:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534                                         break;
2535                                 case 28:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 29:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 30:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 default:
2554                                         gb_tile_moden = 0;
2555                                         break;
2556                                 }
2557                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559                         }
2560                 } else if (num_rbs < 4) {
2561                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562                                 switch (reg_offset) {
2563                                 case 0:
2564                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568                                         break;
2569                                 case 1:
2570                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574                                         break;
2575                                 case 2:
2576                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580                                         break;
2581                                 case 3:
2582                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586                                         break;
2587                                 case 4:
2588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                          TILE_SPLIT(split_equal_to_row_size));
2592                                         break;
2593                                 case 5:
2594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                                         break;
2597                                 case 6:
2598                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602                                         break;
2603                                 case 7:
2604                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                          TILE_SPLIT(split_equal_to_row_size));
2608                                         break;
2609                                 case 8:
2610                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612                                         break;
2613                                 case 9:
2614                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616                                         break;
2617                                 case 10:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                         break;
2623                                 case 11:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                         break;
2629                                 case 12:
2630                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                         break;
2635                                 case 13:
2636                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638                                         break;
2639                                 case 14:
2640                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                         break;
2645                                 case 16:
2646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                         break;
2651                                 case 17:
2652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                         break;
2657                                 case 27:
2658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660                                         break;
2661                                 case 28:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 29:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 30:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                         break;
2679                                 default:
2680                                         gb_tile_moden = 0;
2681                                         break;
2682                                 }
2683                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685                         }
2686                 }
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688                         switch (reg_offset) {
2689                         case 0:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                                 break;
2695                         case 1:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 2:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 3:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 4:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 5:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 6:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 8:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 9:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 10:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 11:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2754                                 break;
2755                         case 12:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2760                                 break;
2761                         case 13:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2766                                 break;
2767                         case 14:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2772                                 break;
2773                         default:
2774                                 gb_tile_moden = 0;
2775                                 break;
2776                         }
2777                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779                 }
2780         } else if (num_pipe_configs == 2) {
2781                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782                         switch (reg_offset) {
2783                         case 0:
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788                                 break;
2789                         case 1:
2790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794                                 break;
2795                         case 2:
2796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2799                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800                                 break;
2801                         case 3:
2802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806                                 break;
2807                         case 4:
2808                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2811                                                  TILE_SPLIT(split_equal_to_row_size));
2812                                 break;
2813                         case 5:
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                                 break;
2817                         case 6:
2818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822                                 break;
2823                         case 7:
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2827                                                  TILE_SPLIT(split_equal_to_row_size));
2828                                 break;
2829                         case 8:
2830                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831                                 break;
2832                         case 9:
2833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835                                 break;
2836                         case 10:
2837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                                 break;
2842                         case 11:
2843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                                 break;
2848                         case 12:
2849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                                 break;
2854                         case 13:
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857                                 break;
2858                         case 14:
2859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                                 break;
2864                         case 16:
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                 break;
2870                         case 17:
2871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                 break;
2876                         case 27:
2877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879                                 break;
2880                         case 28:
2881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                 break;
2886                         case 29:
2887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2890                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                 break;
2892                         case 30:
2893                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2896                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                                 break;
2898                         default:
2899                                 gb_tile_moden = 0;
2900                                 break;
2901                         }
2902                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904                 }
2905                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906                         switch (reg_offset) {
2907                         case 0:
2908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                                 break;
2913                         case 1:
2914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2918                                 break;
2919                         case 2:
2920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2924                                 break;
2925                         case 3:
2926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2930                                 break;
2931                         case 4:
2932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2936                                 break;
2937                         case 5:
2938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2942                                 break;
2943                         case 6:
2944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2948                                 break;
2949                         case 8:
2950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2954                                 break;
2955                         case 9:
2956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2960                                 break;
2961                         case 10:
2962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2966                                 break;
2967                         case 11:
2968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2972                                 break;
2973                         case 12:
2974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2978                                 break;
2979                         case 13:
2980                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2984                                 break;
2985                         case 14:
2986                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2990                                 break;
2991                         default:
2992                                 gb_tile_moden = 0;
2993                                 break;
2994                         }
2995                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                 }
2998         } else
2999                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014                              u32 se_num, u32 sh_num)
3015 {
3016         u32 data = INSTANCE_BROADCAST_WRITES;
3017
3018         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020         else if (se_num == 0xffffffff)
3021                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022         else if (sh_num == 0xffffffff)
3023                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024         else
3025                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026         WREG32(GRBM_GFX_INDEX, data);
3027 }
3028
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039         u32 i, mask = 0;
3040
3041         for (i = 0; i < bit_width; i++) {
3042                 mask <<= 1;
3043                 mask |= 1;
3044         }
3045         return mask;
3046 }
3047
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060                               u32 max_rb_num_per_se,
3061                               u32 sh_per_se)
3062 {
3063         u32 data, mask;
3064
3065         data = RREG32(CC_RB_BACKEND_DISABLE);
3066         if (data & 1)
3067                 data &= BACKEND_DISABLE_MASK;
3068         else
3069                 data = 0;
3070         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071
3072         data >>= BACKEND_DISABLE_SHIFT;
3073
3074         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075
3076         return data & mask;
3077 }
3078
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090                          u32 se_num, u32 sh_per_se,
3091                          u32 max_rb_num_per_se)
3092 {
3093         int i, j;
3094         u32 data, mask;
3095         u32 disabled_rbs = 0;
3096         u32 enabled_rbs = 0;
3097
3098         for (i = 0; i < se_num; i++) {
3099                 for (j = 0; j < sh_per_se; j++) {
3100                         cik_select_se_sh(rdev, i, j);
3101                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102                         if (rdev->family == CHIP_HAWAII)
3103                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104                         else
3105                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106                 }
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109
3110         mask = 1;
3111         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112                 if (!(disabled_rbs & mask))
3113                         enabled_rbs |= mask;
3114                 mask <<= 1;
3115         }
3116
3117         rdev->config.cik.backend_enable_mask = enabled_rbs;
3118
3119         for (i = 0; i < se_num; i++) {
3120                 cik_select_se_sh(rdev, i, 0xffffffff);
3121                 data = 0;
3122                 for (j = 0; j < sh_per_se; j++) {
3123                         switch (enabled_rbs & 3) {
3124                         case 0:
3125                                 if (j == 0)
3126                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127                                 else
3128                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129                                 break;
3130                         case 1:
3131                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132                                 break;
3133                         case 2:
3134                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135                                 break;
3136                         case 3:
3137                         default:
3138                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139                                 break;
3140                         }
3141                         enabled_rbs >>= 2;
3142                 }
3143                 WREG32(PA_SC_RASTER_CONFIG, data);
3144         }
3145         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159         u32 mc_shared_chmap, mc_arb_ramcfg;
3160         u32 hdp_host_path_cntl;
3161         u32 tmp;
3162         int i, j;
3163
3164         switch (rdev->family) {
3165         case CHIP_BONAIRE:
3166                 rdev->config.cik.max_shader_engines = 2;
3167                 rdev->config.cik.max_tile_pipes = 4;
3168                 rdev->config.cik.max_cu_per_sh = 7;
3169                 rdev->config.cik.max_sh_per_se = 1;
3170                 rdev->config.cik.max_backends_per_se = 2;
3171                 rdev->config.cik.max_texture_channel_caches = 4;
3172                 rdev->config.cik.max_gprs = 256;
3173                 rdev->config.cik.max_gs_threads = 32;
3174                 rdev->config.cik.max_hw_contexts = 8;
3175
3176                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181                 break;
3182         case CHIP_HAWAII:
3183                 rdev->config.cik.max_shader_engines = 4;
3184                 rdev->config.cik.max_tile_pipes = 16;
3185                 rdev->config.cik.max_cu_per_sh = 11;
3186                 rdev->config.cik.max_sh_per_se = 1;
3187                 rdev->config.cik.max_backends_per_se = 4;
3188                 rdev->config.cik.max_texture_channel_caches = 16;
3189                 rdev->config.cik.max_gprs = 256;
3190                 rdev->config.cik.max_gs_threads = 32;
3191                 rdev->config.cik.max_hw_contexts = 8;
3192
3193                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198                 break;
3199         case CHIP_KAVERI:
3200                 rdev->config.cik.max_shader_engines = 1;
3201                 rdev->config.cik.max_tile_pipes = 4;
3202                 if ((rdev->pdev->device == 0x1304) ||
3203                     (rdev->pdev->device == 0x1305) ||
3204                     (rdev->pdev->device == 0x130C) ||
3205                     (rdev->pdev->device == 0x130F) ||
3206                     (rdev->pdev->device == 0x1310) ||
3207                     (rdev->pdev->device == 0x1311) ||
3208                     (rdev->pdev->device == 0x131C)) {
3209                         rdev->config.cik.max_cu_per_sh = 8;
3210                         rdev->config.cik.max_backends_per_se = 2;
3211                 } else if ((rdev->pdev->device == 0x1309) ||
3212                            (rdev->pdev->device == 0x130A) ||
3213                            (rdev->pdev->device == 0x130D) ||
3214                            (rdev->pdev->device == 0x1313) ||
3215                            (rdev->pdev->device == 0x131D)) {
3216                         rdev->config.cik.max_cu_per_sh = 6;
3217                         rdev->config.cik.max_backends_per_se = 2;
3218                 } else if ((rdev->pdev->device == 0x1306) ||
3219                            (rdev->pdev->device == 0x1307) ||
3220                            (rdev->pdev->device == 0x130B) ||
3221                            (rdev->pdev->device == 0x130E) ||
3222                            (rdev->pdev->device == 0x1315) ||
3223                            (rdev->pdev->device == 0x131B)) {
3224                         rdev->config.cik.max_cu_per_sh = 4;
3225                         rdev->config.cik.max_backends_per_se = 1;
3226                 } else {
3227                         rdev->config.cik.max_cu_per_sh = 3;
3228                         rdev->config.cik.max_backends_per_se = 1;
3229                 }
3230                 rdev->config.cik.max_sh_per_se = 1;
3231                 rdev->config.cik.max_texture_channel_caches = 4;
3232                 rdev->config.cik.max_gprs = 256;
3233                 rdev->config.cik.max_gs_threads = 16;
3234                 rdev->config.cik.max_hw_contexts = 8;
3235
3236                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                 break;
3242         case CHIP_KABINI:
3243         default:
3244                 rdev->config.cik.max_shader_engines = 1;
3245                 rdev->config.cik.max_tile_pipes = 2;
3246                 rdev->config.cik.max_cu_per_sh = 2;
3247                 rdev->config.cik.max_sh_per_se = 1;
3248                 rdev->config.cik.max_backends_per_se = 1;
3249                 rdev->config.cik.max_texture_channel_caches = 2;
3250                 rdev->config.cik.max_gprs = 256;
3251                 rdev->config.cik.max_gs_threads = 16;
3252                 rdev->config.cik.max_hw_contexts = 8;
3253
3254                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259                 break;
3260         }
3261
3262         /* Initialize HDP */
3263         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264                 WREG32((0x2c14 + j), 0x00000000);
3265                 WREG32((0x2c18 + j), 0x00000000);
3266                 WREG32((0x2c1c + j), 0x00000000);
3267                 WREG32((0x2c20 + j), 0x00000000);
3268                 WREG32((0x2c24 + j), 0x00000000);
3269         }
3270
3271         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272
3273         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274
3275         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277
3278         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279         rdev->config.cik.mem_max_burst_length_bytes = 256;
3280         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282         if (rdev->config.cik.mem_row_size_in_kb > 4)
3283                 rdev->config.cik.mem_row_size_in_kb = 4;
3284         /* XXX use MC settings? */
3285         rdev->config.cik.shader_engine_tile_size = 32;
3286         rdev->config.cik.num_gpus = 1;
3287         rdev->config.cik.multi_gpu_tile_size = 64;
3288
3289         /* fix up row size */
3290         gb_addr_config &= ~ROW_SIZE_MASK;
3291         switch (rdev->config.cik.mem_row_size_in_kb) {
3292         case 1:
3293         default:
3294                 gb_addr_config |= ROW_SIZE(0);
3295                 break;
3296         case 2:
3297                 gb_addr_config |= ROW_SIZE(1);
3298                 break;
3299         case 4:
3300                 gb_addr_config |= ROW_SIZE(2);
3301                 break;
3302         }
3303
3304         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3305          * not have bank info, so create a custom tiling dword.
3306          * bits 3:0   num_pipes
3307          * bits 7:4   num_banks
3308          * bits 11:8  group_size
3309          * bits 15:12 row_size
3310          */
3311         rdev->config.cik.tile_config = 0;
3312         switch (rdev->config.cik.num_tile_pipes) {
3313         case 1:
3314                 rdev->config.cik.tile_config |= (0 << 0);
3315                 break;
3316         case 2:
3317                 rdev->config.cik.tile_config |= (1 << 0);
3318                 break;
3319         case 4:
3320                 rdev->config.cik.tile_config |= (2 << 0);
3321                 break;
3322         case 8:
3323         default:
3324                 /* XXX what about 12? */
3325                 rdev->config.cik.tile_config |= (3 << 0);
3326                 break;
3327         }
3328         rdev->config.cik.tile_config |=
3329                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330         rdev->config.cik.tile_config |=
3331                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332         rdev->config.cik.tile_config |=
3333                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334
3335         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343
3344         cik_tiling_mode_table_init(rdev);
3345
3346         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347                      rdev->config.cik.max_sh_per_se,
3348                      rdev->config.cik.max_backends_per_se);
3349
3350         /* set HW defaults for 3D engine */
3351         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352
3353         WREG32(SX_DEBUG_1, 0x20);
3354
3355         WREG32(TA_CNTL_AUX, 0x00010000);
3356
3357         tmp = RREG32(SPI_CONFIG_CNTL);
3358         tmp |= 0x03000000;
3359         WREG32(SPI_CONFIG_CNTL, tmp);
3360
3361         WREG32(SQ_CONFIG, 1);
3362
3363         WREG32(DB_DEBUG, 0);
3364
3365         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366         tmp |= 0x00000400;
3367         WREG32(DB_DEBUG2, tmp);
3368
3369         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370         tmp |= 0x00020200;
3371         WREG32(DB_DEBUG3, tmp);
3372
3373         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374         tmp |= 0x00018208;
3375         WREG32(CB_HW_CONTROL, tmp);
3376
3377         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378
3379         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383
3384         WREG32(VGT_NUM_INSTANCES, 1);
3385
3386         WREG32(CP_PERFMON_CNTL, 0);
3387
3388         WREG32(SQ_CONFIG, 0);
3389
3390         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391                                           FORCE_EOV_MAX_REZ_CNT(255)));
3392
3393         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395
3396         WREG32(VGT_GS_VERTEX_REUSE, 16);
3397         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398
3399         tmp = RREG32(HDP_MISC_CNTL);
3400         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401         WREG32(HDP_MISC_CNTL, tmp);
3402
3403         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405
3406         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408
3409         udelay(50);
3410 }
3411
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427         int i;
3428
3429         rdev->scratch.num_reg = 7;
3430         rdev->scratch.reg_base = SCRATCH_REG0;
3431         for (i = 0; i < rdev->scratch.num_reg; i++) {
3432                 rdev->scratch.free[i] = true;
3433                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434         }
3435 }
3436
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450         uint32_t scratch;
3451         uint32_t tmp = 0;
3452         unsigned i;
3453         int r;
3454
3455         r = radeon_scratch_get(rdev, &scratch);
3456         if (r) {
3457                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458                 return r;
3459         }
3460         WREG32(scratch, 0xCAFEDEAD);
3461         r = radeon_ring_lock(rdev, ring, 3);
3462         if (r) {
3463                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464                 radeon_scratch_free(rdev, scratch);
3465                 return r;
3466         }
3467         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469         radeon_ring_write(ring, 0xDEADBEEF);
3470         radeon_ring_unlock_commit(rdev, ring);
3471
3472         for (i = 0; i < rdev->usec_timeout; i++) {
3473                 tmp = RREG32(scratch);
3474                 if (tmp == 0xDEADBEEF)
3475                         break;
3476                 DRM_UDELAY(1);
3477         }
3478         if (i < rdev->usec_timeout) {
3479                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480         } else {
3481                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482                           ring->idx, scratch, tmp);
3483                 r = -EINVAL;
3484         }
3485         radeon_scratch_free(rdev, scratch);
3486         return r;
3487 }
3488
3489 /**
3490  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3491  *
3492  * @rdev: radeon_device pointer
3493  * @ridx: radeon ring index
3494  *
3495  * Emits an hdp flush on the cp.
3496  */
3497 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3498                                        int ridx)
3499 {
3500         struct radeon_ring *ring = &rdev->ring[ridx];
3501
3502         /* We should be using the new WAIT_REG_MEM special op packet here
3503          * but it causes the CP to hang
3504          */
3505         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3506         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3507                                  WRITE_DATA_DST_SEL(0)));
3508         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3509         radeon_ring_write(ring, 0);
3510         radeon_ring_write(ring, 0);
3511 }
3512
3513 /**
3514  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3515  *
3516  * @rdev: radeon_device pointer
3517  * @fence: radeon fence object
3518  *
3519  * Emits a fence sequnce number on the gfx ring and flushes
3520  * GPU caches.
3521  */
3522 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3523                              struct radeon_fence *fence)
3524 {
3525         struct radeon_ring *ring = &rdev->ring[fence->ring];
3526         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3527
3528         /* EVENT_WRITE_EOP - flush caches, send int */
3529         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3530         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3531                                  EOP_TC_ACTION_EN |
3532                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3533                                  EVENT_INDEX(5)));
3534         radeon_ring_write(ring, addr & 0xfffffffc);
3535         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3536         radeon_ring_write(ring, fence->seq);
3537         radeon_ring_write(ring, 0);
3538         /* HDP flush */
3539         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3540 }
3541
3542 /**
3543  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3544  *
3545  * @rdev: radeon_device pointer
3546  * @fence: radeon fence object
3547  *
3548  * Emits a fence sequnce number on the compute ring and flushes
3549  * GPU caches.
3550  */
3551 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3552                                  struct radeon_fence *fence)
3553 {
3554         struct radeon_ring *ring = &rdev->ring[fence->ring];
3555         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3556
3557         /* RELEASE_MEM - flush caches, send int */
3558         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3559         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3560                                  EOP_TC_ACTION_EN |
3561                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3562                                  EVENT_INDEX(5)));
3563         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3564         radeon_ring_write(ring, addr & 0xfffffffc);
3565         radeon_ring_write(ring, upper_32_bits(addr));
3566         radeon_ring_write(ring, fence->seq);
3567         radeon_ring_write(ring, 0);
3568         /* HDP flush */
3569         cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3570 }
3571
3572 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3573                              struct radeon_ring *ring,
3574                              struct radeon_semaphore *semaphore,
3575                              bool emit_wait)
3576 {
3577         uint64_t addr = semaphore->gpu_addr;
3578         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3579
3580         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3581         radeon_ring_write(ring, addr & 0xffffffff);
3582         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3583
3584         return true;
3585 }
3586
3587 /**
3588  * cik_copy_cpdma - copy pages using the CP DMA engine
3589  *
3590  * @rdev: radeon_device pointer
3591  * @src_offset: src GPU address
3592  * @dst_offset: dst GPU address
3593  * @num_gpu_pages: number of GPU pages to xfer
3594  * @fence: radeon fence object
3595  *
3596  * Copy GPU paging using the CP DMA engine (CIK+).
3597  * Used by the radeon ttm implementation to move pages if
3598  * registered as the asic copy callback.
3599  */
3600 int cik_copy_cpdma(struct radeon_device *rdev,
3601                    uint64_t src_offset, uint64_t dst_offset,
3602                    unsigned num_gpu_pages,
3603                    struct radeon_fence **fence)
3604 {
3605         struct radeon_semaphore *sem = NULL;
3606         int ring_index = rdev->asic->copy.blit_ring_index;
3607         struct radeon_ring *ring = &rdev->ring[ring_index];
3608         u32 size_in_bytes, cur_size_in_bytes, control;
3609         int i, num_loops;
3610         int r = 0;
3611
3612         r = radeon_semaphore_create(rdev, &sem);
3613         if (r) {
3614                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3615                 return r;
3616         }
3617
3618         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3619         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3620         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3621         if (r) {
3622                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3623                 radeon_semaphore_free(rdev, &sem, NULL);
3624                 return r;
3625         }
3626
3627         radeon_semaphore_sync_to(sem, *fence);
3628         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3629
3630         for (i = 0; i < num_loops; i++) {
3631                 cur_size_in_bytes = size_in_bytes;
3632                 if (cur_size_in_bytes > 0x1fffff)
3633                         cur_size_in_bytes = 0x1fffff;
3634                 size_in_bytes -= cur_size_in_bytes;
3635                 control = 0;
3636                 if (size_in_bytes == 0)
3637                         control |= PACKET3_DMA_DATA_CP_SYNC;
3638                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3639                 radeon_ring_write(ring, control);
3640                 radeon_ring_write(ring, lower_32_bits(src_offset));
3641                 radeon_ring_write(ring, upper_32_bits(src_offset));
3642                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3643                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3644                 radeon_ring_write(ring, cur_size_in_bytes);
3645                 src_offset += cur_size_in_bytes;
3646                 dst_offset += cur_size_in_bytes;
3647         }
3648
3649         r = radeon_fence_emit(rdev, fence, ring->idx);
3650         if (r) {
3651                 radeon_ring_unlock_undo(rdev, ring);
3652                 return r;
3653         }
3654
3655         radeon_ring_unlock_commit(rdev, ring);
3656         radeon_semaphore_free(rdev, &sem, *fence);
3657
3658         return r;
3659 }
3660
3661 /*
3662  * IB stuff
3663  */
3664 /**
3665  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3666  *
3667  * @rdev: radeon_device pointer
3668  * @ib: radeon indirect buffer object
3669  *
3670  * Emits an DE (drawing engine) or CE (constant engine) IB
3671  * on the gfx ring.  IBs are usually generated by userspace
3672  * acceleration drivers and submitted to the kernel for
3673  * sheduling on the ring.  This function schedules the IB
3674  * on the gfx ring for execution by the GPU.
3675  */
3676 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3677 {
3678         struct radeon_ring *ring = &rdev->ring[ib->ring];
3679         u32 header, control = INDIRECT_BUFFER_VALID;
3680
3681         if (ib->is_const_ib) {
3682                 /* set switch buffer packet before const IB */
3683                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3684                 radeon_ring_write(ring, 0);
3685
3686                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3687         } else {
3688                 u32 next_rptr;
3689                 if (ring->rptr_save_reg) {
3690                         next_rptr = ring->wptr + 3 + 4;
3691                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3692                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3693                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3694                         radeon_ring_write(ring, next_rptr);
3695                 } else if (rdev->wb.enabled) {
3696                         next_rptr = ring->wptr + 5 + 4;
3697                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3698                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3699                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3700                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3701                         radeon_ring_write(ring, next_rptr);
3702                 }
3703
3704                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3705         }
3706
3707         control |= ib->length_dw |
3708                 (ib->vm ? (ib->vm->id << 24) : 0);
3709
3710         radeon_ring_write(ring, header);
3711         radeon_ring_write(ring,
3712 #ifdef __BIG_ENDIAN
3713                           (2 << 0) |
3714 #endif
3715                           (ib->gpu_addr & 0xFFFFFFFC));
3716         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3717         radeon_ring_write(ring, control);
3718 }
3719
3720 /**
3721  * cik_ib_test - basic gfx ring IB test
3722  *
3723  * @rdev: radeon_device pointer
3724  * @ring: radeon_ring structure holding ring information
3725  *
3726  * Allocate an IB and execute it on the gfx ring (CIK).
3727  * Provides a basic gfx ring test to verify that IBs are working.
3728  * Returns 0 on success, error on failure.
3729  */
3730 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3731 {
3732         struct radeon_ib ib;
3733         uint32_t scratch;
3734         uint32_t tmp = 0;
3735         unsigned i;
3736         int r;
3737
3738         r = radeon_scratch_get(rdev, &scratch);
3739         if (r) {
3740                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3741                 return r;
3742         }
3743         WREG32(scratch, 0xCAFEDEAD);
3744         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3745         if (r) {
3746                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3747                 radeon_scratch_free(rdev, scratch);
3748                 return r;
3749         }
3750         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3751         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3752         ib.ptr[2] = 0xDEADBEEF;
3753         ib.length_dw = 3;
3754         r = radeon_ib_schedule(rdev, &ib, NULL);
3755         if (r) {
3756                 radeon_scratch_free(rdev, scratch);
3757                 radeon_ib_free(rdev, &ib);
3758                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3759                 return r;
3760         }
3761         r = radeon_fence_wait(ib.fence, false);
3762         if (r) {
3763                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3764                 radeon_scratch_free(rdev, scratch);
3765                 radeon_ib_free(rdev, &ib);
3766                 return r;
3767         }
3768         for (i = 0; i < rdev->usec_timeout; i++) {
3769                 tmp = RREG32(scratch);
3770                 if (tmp == 0xDEADBEEF)
3771                         break;
3772                 DRM_UDELAY(1);
3773         }
3774         if (i < rdev->usec_timeout) {
3775                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3776         } else {
3777                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3778                           scratch, tmp);
3779                 r = -EINVAL;
3780         }
3781         radeon_scratch_free(rdev, scratch);
3782         radeon_ib_free(rdev, &ib);
3783         return r;
3784 }
3785
3786 /*
3787  * CP.
3788  * On CIK, gfx and compute now have independant command processors.
3789  *
3790  * GFX
3791  * Gfx consists of a single ring and can process both gfx jobs and
3792  * compute jobs.  The gfx CP consists of three microengines (ME):
3793  * PFP - Pre-Fetch Parser
3794  * ME - Micro Engine
3795  * CE - Constant Engine
3796  * The PFP and ME make up what is considered the Drawing Engine (DE).
3797  * The CE is an asynchronous engine used for updating buffer desciptors
3798  * used by the DE so that they can be loaded into cache in parallel
3799  * while the DE is processing state update packets.
3800  *
3801  * Compute
3802  * The compute CP consists of two microengines (ME):
3803  * MEC1 - Compute MicroEngine 1
3804  * MEC2 - Compute MicroEngine 2
3805  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3806  * The queues are exposed to userspace and are programmed directly
3807  * by the compute runtime.
3808  */
3809 /**
3810  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3811  *
3812  * @rdev: radeon_device pointer
3813  * @enable: enable or disable the MEs
3814  *
3815  * Halts or unhalts the gfx MEs.
3816  */
3817 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3818 {
3819         if (enable)
3820                 WREG32(CP_ME_CNTL, 0);
3821         else {
3822                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3823                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3824         }
3825         udelay(50);
3826 }
3827
3828 /**
3829  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3830  *
3831  * @rdev: radeon_device pointer
3832  *
3833  * Loads the gfx PFP, ME, and CE ucode.
3834  * Returns 0 for success, -EINVAL if the ucode is not available.
3835  */
3836 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3837 {
3838         const __be32 *fw_data;
3839         int i;
3840
3841         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3842                 return -EINVAL;
3843
3844         cik_cp_gfx_enable(rdev, false);
3845
3846         /* PFP */
3847         fw_data = (const __be32 *)rdev->pfp_fw->data;
3848         WREG32(CP_PFP_UCODE_ADDR, 0);
3849         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3850                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3851         WREG32(CP_PFP_UCODE_ADDR, 0);
3852
3853         /* CE */
3854         fw_data = (const __be32 *)rdev->ce_fw->data;
3855         WREG32(CP_CE_UCODE_ADDR, 0);
3856         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3857                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3858         WREG32(CP_CE_UCODE_ADDR, 0);
3859
3860         /* ME */
3861         fw_data = (const __be32 *)rdev->me_fw->data;
3862         WREG32(CP_ME_RAM_WADDR, 0);
3863         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3864                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3865         WREG32(CP_ME_RAM_WADDR, 0);
3866
3867         WREG32(CP_PFP_UCODE_ADDR, 0);
3868         WREG32(CP_CE_UCODE_ADDR, 0);
3869         WREG32(CP_ME_RAM_WADDR, 0);
3870         WREG32(CP_ME_RAM_RADDR, 0);
3871         return 0;
3872 }
3873
3874 /**
3875  * cik_cp_gfx_start - start the gfx ring
3876  *
3877  * @rdev: radeon_device pointer
3878  *
3879  * Enables the ring and loads the clear state context and other
3880  * packets required to init the ring.
3881  * Returns 0 for success, error for failure.
3882  */
3883 static int cik_cp_gfx_start(struct radeon_device *rdev)
3884 {
3885         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3886         int r, i;
3887
3888         /* init the CP */
3889         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3890         WREG32(CP_ENDIAN_SWAP, 0);
3891         WREG32(CP_DEVICE_ID, 1);
3892
3893         cik_cp_gfx_enable(rdev, true);
3894
3895         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3896         if (r) {
3897                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3898                 return r;
3899         }
3900
3901         /* init the CE partitions.  CE only used for gfx on CIK */
3902         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3903         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3904         radeon_ring_write(ring, 0xc000);
3905         radeon_ring_write(ring, 0xc000);
3906
3907         /* setup clear context state */
3908         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3909         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3910
3911         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3912         radeon_ring_write(ring, 0x80000000);
3913         radeon_ring_write(ring, 0x80000000);
3914
3915         for (i = 0; i < cik_default_size; i++)
3916                 radeon_ring_write(ring, cik_default_state[i]);
3917
3918         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3919         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3920
3921         /* set clear context state */
3922         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3923         radeon_ring_write(ring, 0);
3924
3925         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3926         radeon_ring_write(ring, 0x00000316);
3927         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3928         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3929
3930         radeon_ring_unlock_commit(rdev, ring);
3931
3932         return 0;
3933 }
3934
3935 /**
3936  * cik_cp_gfx_fini - stop the gfx ring
3937  *
3938  * @rdev: radeon_device pointer
3939  *
3940  * Stop the gfx ring and tear down the driver ring
3941  * info.
3942  */
3943 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3944 {
3945         cik_cp_gfx_enable(rdev, false);
3946         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3947 }
3948
3949 /**
3950  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3951  *
3952  * @rdev: radeon_device pointer
3953  *
3954  * Program the location and size of the gfx ring buffer
3955  * and test it to make sure it's working.
3956  * Returns 0 for success, error for failure.
3957  */
3958 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3959 {
3960         struct radeon_ring *ring;
3961         u32 tmp;
3962         u32 rb_bufsz;
3963         u64 rb_addr;
3964         int r;
3965
3966         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3967         if (rdev->family != CHIP_HAWAII)
3968                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3969
3970         /* Set the write pointer delay */
3971         WREG32(CP_RB_WPTR_DELAY, 0);
3972
3973         /* set the RB to use vmid 0 */
3974         WREG32(CP_RB_VMID, 0);
3975
3976         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3977
3978         /* ring 0 - compute and gfx */
3979         /* Set ring buffer size */
3980         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3981         rb_bufsz = order_base_2(ring->ring_size / 8);
3982         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3983 #ifdef __BIG_ENDIAN
3984         tmp |= BUF_SWAP_32BIT;
3985 #endif
3986         WREG32(CP_RB0_CNTL, tmp);
3987
3988         /* Initialize the ring buffer's read and write pointers */
3989         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3990         ring->wptr = 0;
3991         WREG32(CP_RB0_WPTR, ring->wptr);
3992
3993         /* set the wb address wether it's enabled or not */
3994         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3995         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3996
3997         /* scratch register shadowing is no longer supported */
3998         WREG32(SCRATCH_UMSK, 0);
3999
4000         if (!rdev->wb.enabled)
4001                 tmp |= RB_NO_UPDATE;
4002
4003         mdelay(1);
4004         WREG32(CP_RB0_CNTL, tmp);
4005
4006         rb_addr = ring->gpu_addr >> 8;
4007         WREG32(CP_RB0_BASE, rb_addr);
4008         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4009
4010         ring->rptr = RREG32(CP_RB0_RPTR);
4011
4012         /* start the ring */
4013         cik_cp_gfx_start(rdev);
4014         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4015         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4016         if (r) {
4017                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4018                 return r;
4019         }
4020         return 0;
4021 }
4022
4023 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4024                      struct radeon_ring *ring)
4025 {
4026         u32 rptr;
4027
4028         if (rdev->wb.enabled)
4029                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4030         else
4031                 rptr = RREG32(CP_RB0_RPTR);
4032
4033         return rptr;
4034 }
4035
4036 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4037                      struct radeon_ring *ring)
4038 {
4039         u32 wptr;
4040
4041         wptr = RREG32(CP_RB0_WPTR);
4042
4043         return wptr;
4044 }
4045
4046 void cik_gfx_set_wptr(struct radeon_device *rdev,
4047                       struct radeon_ring *ring)
4048 {
4049         WREG32(CP_RB0_WPTR, ring->wptr);
4050         (void)RREG32(CP_RB0_WPTR);
4051 }
4052
4053 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4054                          struct radeon_ring *ring)
4055 {
4056         u32 rptr;
4057
4058         if (rdev->wb.enabled) {
4059                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4060         } else {
4061                 mutex_lock(&rdev->srbm_mutex);
4062                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4063                 rptr = RREG32(CP_HQD_PQ_RPTR);
4064                 cik_srbm_select(rdev, 0, 0, 0, 0);
4065                 mutex_unlock(&rdev->srbm_mutex);
4066         }
4067
4068         return rptr;
4069 }
4070
4071 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4072                          struct radeon_ring *ring)
4073 {
4074         u32 wptr;
4075
4076         if (rdev->wb.enabled) {
4077                 /* XXX check if swapping is necessary on BE */
4078                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4079         } else {
4080                 mutex_lock(&rdev->srbm_mutex);
4081                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4082                 wptr = RREG32(CP_HQD_PQ_WPTR);
4083                 cik_srbm_select(rdev, 0, 0, 0, 0);
4084                 mutex_unlock(&rdev->srbm_mutex);
4085         }
4086
4087         return wptr;
4088 }
4089
4090 void cik_compute_set_wptr(struct radeon_device *rdev,
4091                           struct radeon_ring *ring)
4092 {
4093         /* XXX check if swapping is necessary on BE */
4094         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4095         WDOORBELL32(ring->doorbell_index, ring->wptr);
4096 }
4097
4098 /**
4099  * cik_cp_compute_enable - enable/disable the compute CP MEs
4100  *
4101  * @rdev: radeon_device pointer
4102  * @enable: enable or disable the MEs
4103  *
4104  * Halts or unhalts the compute MEs.
4105  */
4106 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4107 {
4108         if (enable)
4109                 WREG32(CP_MEC_CNTL, 0);
4110         else
4111                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4112         udelay(50);
4113 }
4114
4115 /**
4116  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4117  *
4118  * @rdev: radeon_device pointer
4119  *
4120  * Loads the compute MEC1&2 ucode.
4121  * Returns 0 for success, -EINVAL if the ucode is not available.
4122  */
4123 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4124 {
4125         const __be32 *fw_data;
4126         int i;
4127
4128         if (!rdev->mec_fw)
4129                 return -EINVAL;
4130
4131         cik_cp_compute_enable(rdev, false);
4132
4133         /* MEC1 */
4134         fw_data = (const __be32 *)rdev->mec_fw->data;
4135         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4136         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4137                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4138         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4139
4140         if (rdev->family == CHIP_KAVERI) {
4141                 /* MEC2 */
4142                 fw_data = (const __be32 *)rdev->mec_fw->data;
4143                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4144                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4145                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4146                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4147         }
4148
4149         return 0;
4150 }
4151
4152 /**
4153  * cik_cp_compute_start - start the compute queues
4154  *
4155  * @rdev: radeon_device pointer
4156  *
4157  * Enable the compute queues.
4158  * Returns 0 for success, error for failure.
4159  */
4160 static int cik_cp_compute_start(struct radeon_device *rdev)
4161 {
4162         cik_cp_compute_enable(rdev, true);
4163
4164         return 0;
4165 }
4166
4167 /**
4168  * cik_cp_compute_fini - stop the compute queues
4169  *
4170  * @rdev: radeon_device pointer
4171  *
4172  * Stop the compute queues and tear down the driver queue
4173  * info.
4174  */
4175 static void cik_cp_compute_fini(struct radeon_device *rdev)
4176 {
4177         int i, idx, r;
4178
4179         cik_cp_compute_enable(rdev, false);
4180
4181         for (i = 0; i < 2; i++) {
4182                 if (i == 0)
4183                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4184                 else
4185                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4186
4187                 if (rdev->ring[idx].mqd_obj) {
4188                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4189                         if (unlikely(r != 0))
4190                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4191
4192                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4193                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4194
4195                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4196                         rdev->ring[idx].mqd_obj = NULL;
4197                 }
4198         }
4199 }
4200
4201 static void cik_mec_fini(struct radeon_device *rdev)
4202 {
4203         int r;
4204
4205         if (rdev->mec.hpd_eop_obj) {
4206                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4207                 if (unlikely(r != 0))
4208                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4209                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4210                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4211
4212                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4213                 rdev->mec.hpd_eop_obj = NULL;
4214         }
4215 }
4216
4217 #define MEC_HPD_SIZE 2048
4218
4219 static int cik_mec_init(struct radeon_device *rdev)
4220 {
4221         int r;
4222         u32 *hpd;
4223
4224         /*
4225          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4226          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4227          */
4228         if (rdev->family == CHIP_KAVERI)
4229                 rdev->mec.num_mec = 2;
4230         else
4231                 rdev->mec.num_mec = 1;
4232         rdev->mec.num_pipe = 4;
4233         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4234
4235         if (rdev->mec.hpd_eop_obj == NULL) {
4236                 r = radeon_bo_create(rdev,
4237                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4238                                      PAGE_SIZE, true,
4239                                      RADEON_GEM_DOMAIN_GTT, NULL,
4240                                      &rdev->mec.hpd_eop_obj);
4241                 if (r) {
4242                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4243                         return r;
4244                 }
4245         }
4246
4247         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4248         if (unlikely(r != 0)) {
4249                 cik_mec_fini(rdev);
4250                 return r;
4251         }
4252         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4253                           &rdev->mec.hpd_eop_gpu_addr);
4254         if (r) {
4255                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4256                 cik_mec_fini(rdev);
4257                 return r;
4258         }
4259         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4260         if (r) {
4261                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4262                 cik_mec_fini(rdev);
4263                 return r;
4264         }
4265
4266         /* clear memory.  Not sure if this is required or not */
4267         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4268
4269         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4270         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4271
4272         return 0;
4273 }
4274
4275 struct hqd_registers
4276 {
4277         u32 cp_mqd_base_addr;
4278         u32 cp_mqd_base_addr_hi;
4279         u32 cp_hqd_active;
4280         u32 cp_hqd_vmid;
4281         u32 cp_hqd_persistent_state;
4282         u32 cp_hqd_pipe_priority;
4283         u32 cp_hqd_queue_priority;
4284         u32 cp_hqd_quantum;
4285         u32 cp_hqd_pq_base;
4286         u32 cp_hqd_pq_base_hi;
4287         u32 cp_hqd_pq_rptr;
4288         u32 cp_hqd_pq_rptr_report_addr;
4289         u32 cp_hqd_pq_rptr_report_addr_hi;
4290         u32 cp_hqd_pq_wptr_poll_addr;
4291         u32 cp_hqd_pq_wptr_poll_addr_hi;
4292         u32 cp_hqd_pq_doorbell_control;
4293         u32 cp_hqd_pq_wptr;
4294         u32 cp_hqd_pq_control;
4295         u32 cp_hqd_ib_base_addr;
4296         u32 cp_hqd_ib_base_addr_hi;
4297         u32 cp_hqd_ib_rptr;
4298         u32 cp_hqd_ib_control;
4299         u32 cp_hqd_iq_timer;
4300         u32 cp_hqd_iq_rptr;
4301         u32 cp_hqd_dequeue_request;
4302         u32 cp_hqd_dma_offload;
4303         u32 cp_hqd_sema_cmd;
4304         u32 cp_hqd_msg_type;
4305         u32 cp_hqd_atomic0_preop_lo;
4306         u32 cp_hqd_atomic0_preop_hi;
4307         u32 cp_hqd_atomic1_preop_lo;
4308         u32 cp_hqd_atomic1_preop_hi;
4309         u32 cp_hqd_hq_scheduler0;
4310         u32 cp_hqd_hq_scheduler1;
4311         u32 cp_mqd_control;
4312 };
4313
4314 struct bonaire_mqd
4315 {
4316         u32 header;
4317         u32 dispatch_initiator;
4318         u32 dimensions[3];
4319         u32 start_idx[3];
4320         u32 num_threads[3];
4321         u32 pipeline_stat_enable;
4322         u32 perf_counter_enable;
4323         u32 pgm[2];
4324         u32 tba[2];
4325         u32 tma[2];
4326         u32 pgm_rsrc[2];
4327         u32 vmid;
4328         u32 resource_limits;
4329         u32 static_thread_mgmt01[2];
4330         u32 tmp_ring_size;
4331         u32 static_thread_mgmt23[2];
4332         u32 restart[3];
4333         u32 thread_trace_enable;
4334         u32 reserved1;
4335         u32 user_data[16];
4336         u32 vgtcs_invoke_count[2];
4337         struct hqd_registers queue_state;
4338         u32 dequeue_cntr;
4339         u32 interrupt_queue[64];
4340 };
4341
4342 /**
4343  * cik_cp_compute_resume - setup the compute queue registers
4344  *
4345  * @rdev: radeon_device pointer
4346  *
4347  * Program the compute queues and test them to make sure they
4348  * are working.
4349  * Returns 0 for success, error for failure.
4350  */
4351 static int cik_cp_compute_resume(struct radeon_device *rdev)
4352 {
4353         int r, i, idx;
4354         u32 tmp;
4355         bool use_doorbell = true;
4356         u64 hqd_gpu_addr;
4357         u64 mqd_gpu_addr;
4358         u64 eop_gpu_addr;
4359         u64 wb_gpu_addr;
4360         u32 *buf;
4361         struct bonaire_mqd *mqd;
4362
4363         r = cik_cp_compute_start(rdev);
4364         if (r)
4365                 return r;
4366
4367         /* fix up chicken bits */
4368         tmp = RREG32(CP_CPF_DEBUG);
4369         tmp |= (1 << 23);
4370         WREG32(CP_CPF_DEBUG, tmp);
4371
4372         /* init the pipes */
4373         mutex_lock(&rdev->srbm_mutex);
4374         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4375                 int me = (i < 4) ? 1 : 2;
4376                 int pipe = (i < 4) ? i : (i - 4);
4377
4378                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4379
4380                 cik_srbm_select(rdev, me, pipe, 0, 0);
4381
4382                 /* write the EOP addr */
4383                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4384                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4385
4386                 /* set the VMID assigned */
4387                 WREG32(CP_HPD_EOP_VMID, 0);
4388
4389                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4390                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4391                 tmp &= ~EOP_SIZE_MASK;
4392                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4393                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4394         }
4395         cik_srbm_select(rdev, 0, 0, 0, 0);
4396         mutex_unlock(&rdev->srbm_mutex);
4397
4398         /* init the queues.  Just two for now. */
4399         for (i = 0; i < 2; i++) {
4400                 if (i == 0)
4401                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4402                 else
4403                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4404
4405                 if (rdev->ring[idx].mqd_obj == NULL) {
4406                         r = radeon_bo_create(rdev,
4407                                              sizeof(struct bonaire_mqd),
4408                                              PAGE_SIZE, true,
4409                                              RADEON_GEM_DOMAIN_GTT, NULL,
4410                                              &rdev->ring[idx].mqd_obj);
4411                         if (r) {
4412                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4413                                 return r;
4414                         }
4415                 }
4416
4417                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4418                 if (unlikely(r != 0)) {
4419                         cik_cp_compute_fini(rdev);
4420                         return r;
4421                 }
4422                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4423                                   &mqd_gpu_addr);
4424                 if (r) {
4425                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4426                         cik_cp_compute_fini(rdev);
4427                         return r;
4428                 }
4429                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4430                 if (r) {
4431                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4432                         cik_cp_compute_fini(rdev);
4433                         return r;
4434                 }
4435
4436                 /* init the mqd struct */
4437                 memset(buf, 0, sizeof(struct bonaire_mqd));
4438
4439                 mqd = (struct bonaire_mqd *)buf;
4440                 mqd->header = 0xC0310800;
4441                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4442                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4443                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4444                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4445
4446                 mutex_lock(&rdev->srbm_mutex);
4447                 cik_srbm_select(rdev, rdev->ring[idx].me,
4448                                 rdev->ring[idx].pipe,
4449                                 rdev->ring[idx].queue, 0);
4450
4451                 /* disable wptr polling */
4452                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4453                 tmp &= ~WPTR_POLL_EN;
4454                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4455
4456                 /* enable doorbell? */
4457                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4458                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4459                 if (use_doorbell)
4460                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4461                 else
4462                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4463                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4464                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4465
4466                 /* disable the queue if it's active */
4467                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4468                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4469                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4470                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4471                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4472                         for (i = 0; i < rdev->usec_timeout; i++) {
4473                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4474                                         break;
4475                                 udelay(1);
4476                         }
4477                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4478                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4479                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4480                 }
4481
4482                 /* set the pointer to the MQD */
4483                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4484                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4485                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4486                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4487                 /* set MQD vmid to 0 */
4488                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4489                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4490                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4491
4492                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4493                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4494                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4495                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4496                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4497                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4498
4499                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4500                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4501                 mqd->queue_state.cp_hqd_pq_control &=
4502                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4503
4504                 mqd->queue_state.cp_hqd_pq_control |=
4505                         order_base_2(rdev->ring[idx].ring_size / 8);
4506                 mqd->queue_state.cp_hqd_pq_control |=
4507                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4508 #ifdef __BIG_ENDIAN
4509                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4510 #endif
4511                 mqd->queue_state.cp_hqd_pq_control &=
4512                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4513                 mqd->queue_state.cp_hqd_pq_control |=
4514                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4515                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4516
4517                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4518                 if (i == 0)
4519                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4520                 else
4521                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4522                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4523                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4524                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4525                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4526                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4527
4528                 /* set the wb address wether it's enabled or not */
4529                 if (i == 0)
4530                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4531                 else
4532                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4533                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4534                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4535                         upper_32_bits(wb_gpu_addr) & 0xffff;
4536                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4537                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4538                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4539                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4540
4541                 /* enable the doorbell if requested */
4542                 if (use_doorbell) {
4543                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4544                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4545                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4546                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4547                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4548                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4549                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4550                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4551
4552                 } else {
4553                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4554                 }
4555                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4556                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4557
4558                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4559                 rdev->ring[idx].wptr = 0;
4560                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4561                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4562                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4563                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4564
4565                 /* set the vmid for the queue */
4566                 mqd->queue_state.cp_hqd_vmid = 0;
4567                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4568
4569                 /* activate the queue */
4570                 mqd->queue_state.cp_hqd_active = 1;
4571                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4572
4573                 cik_srbm_select(rdev, 0, 0, 0, 0);
4574                 mutex_unlock(&rdev->srbm_mutex);
4575
4576                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4577                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4578
4579                 rdev->ring[idx].ready = true;
4580                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4581                 if (r)
4582                         rdev->ring[idx].ready = false;
4583         }
4584
4585         return 0;
4586 }
4587
4588 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4589 {
4590         cik_cp_gfx_enable(rdev, enable);
4591         cik_cp_compute_enable(rdev, enable);
4592 }
4593
4594 static int cik_cp_load_microcode(struct radeon_device *rdev)
4595 {
4596         int r;
4597
4598         r = cik_cp_gfx_load_microcode(rdev);
4599         if (r)
4600                 return r;
4601         r = cik_cp_compute_load_microcode(rdev);
4602         if (r)
4603                 return r;
4604
4605         return 0;
4606 }
4607
4608 static void cik_cp_fini(struct radeon_device *rdev)
4609 {
4610         cik_cp_gfx_fini(rdev);
4611         cik_cp_compute_fini(rdev);
4612 }
4613
4614 static int cik_cp_resume(struct radeon_device *rdev)
4615 {
4616         int r;
4617
4618         cik_enable_gui_idle_interrupt(rdev, false);
4619
4620         r = cik_cp_load_microcode(rdev);
4621         if (r)
4622                 return r;
4623
4624         r = cik_cp_gfx_resume(rdev);
4625         if (r)
4626                 return r;
4627         r = cik_cp_compute_resume(rdev);
4628         if (r)
4629                 return r;
4630
4631         cik_enable_gui_idle_interrupt(rdev, true);
4632
4633         return 0;
4634 }
4635
4636 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4637 {
4638         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4639                 RREG32(GRBM_STATUS));
4640         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4641                 RREG32(GRBM_STATUS2));
4642         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4643                 RREG32(GRBM_STATUS_SE0));
4644         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4645                 RREG32(GRBM_STATUS_SE1));
4646         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4647                 RREG32(GRBM_STATUS_SE2));
4648         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4649                 RREG32(GRBM_STATUS_SE3));
4650         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4651                 RREG32(SRBM_STATUS));
4652         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4653                 RREG32(SRBM_STATUS2));
4654         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4655                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4656         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4657                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4658         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4659         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4660                  RREG32(CP_STALLED_STAT1));
4661         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4662                  RREG32(CP_STALLED_STAT2));
4663         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4664                  RREG32(CP_STALLED_STAT3));
4665         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4666                  RREG32(CP_CPF_BUSY_STAT));
4667         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4668                  RREG32(CP_CPF_STALLED_STAT1));
4669         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4670         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4671         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4672                  RREG32(CP_CPC_STALLED_STAT1));
4673         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4674 }
4675
4676 /**
4677  * cik_gpu_check_soft_reset - check which blocks are busy
4678  *
4679  * @rdev: radeon_device pointer
4680  *
4681  * Check which blocks are busy and return the relevant reset
4682  * mask to be used by cik_gpu_soft_reset().
4683  * Returns a mask of the blocks to be reset.
4684  */
4685 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4686 {
4687         u32 reset_mask = 0;
4688         u32 tmp;
4689
4690         /* GRBM_STATUS */
4691         tmp = RREG32(GRBM_STATUS);
4692         if (tmp & (PA_BUSY | SC_BUSY |
4693                    BCI_BUSY | SX_BUSY |
4694                    TA_BUSY | VGT_BUSY |
4695                    DB_BUSY | CB_BUSY |
4696                    GDS_BUSY | SPI_BUSY |
4697                    IA_BUSY | IA_BUSY_NO_DMA))
4698                 reset_mask |= RADEON_RESET_GFX;
4699
4700         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4701                 reset_mask |= RADEON_RESET_CP;
4702
4703         /* GRBM_STATUS2 */
4704         tmp = RREG32(GRBM_STATUS2);
4705         if (tmp & RLC_BUSY)
4706                 reset_mask |= RADEON_RESET_RLC;
4707
4708         /* SDMA0_STATUS_REG */
4709         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4710         if (!(tmp & SDMA_IDLE))
4711                 reset_mask |= RADEON_RESET_DMA;
4712
4713         /* SDMA1_STATUS_REG */
4714         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4715         if (!(tmp & SDMA_IDLE))
4716                 reset_mask |= RADEON_RESET_DMA1;
4717
4718         /* SRBM_STATUS2 */
4719         tmp = RREG32(SRBM_STATUS2);
4720         if (tmp & SDMA_BUSY)
4721                 reset_mask |= RADEON_RESET_DMA;
4722
4723         if (tmp & SDMA1_BUSY)
4724                 reset_mask |= RADEON_RESET_DMA1;
4725
4726         /* SRBM_STATUS */
4727         tmp = RREG32(SRBM_STATUS);
4728
4729         if (tmp & IH_BUSY)
4730                 reset_mask |= RADEON_RESET_IH;
4731
4732         if (tmp & SEM_BUSY)
4733                 reset_mask |= RADEON_RESET_SEM;
4734
4735         if (tmp & GRBM_RQ_PENDING)
4736                 reset_mask |= RADEON_RESET_GRBM;
4737
4738         if (tmp & VMC_BUSY)
4739                 reset_mask |= RADEON_RESET_VMC;
4740
4741         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4742                    MCC_BUSY | MCD_BUSY))
4743                 reset_mask |= RADEON_RESET_MC;
4744
4745         if (evergreen_is_display_hung(rdev))
4746                 reset_mask |= RADEON_RESET_DISPLAY;
4747
4748         /* Skip MC reset as it's mostly likely not hung, just busy */
4749         if (reset_mask & RADEON_RESET_MC) {
4750                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4751                 reset_mask &= ~RADEON_RESET_MC;
4752         }
4753
4754         return reset_mask;
4755 }
4756
4757 /**
4758  * cik_gpu_soft_reset - soft reset GPU
4759  *
4760  * @rdev: radeon_device pointer
4761  * @reset_mask: mask of which blocks to reset
4762  *
4763  * Soft reset the blocks specified in @reset_mask.
4764  */
4765 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4766 {
4767         struct evergreen_mc_save save;
4768         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4769         u32 tmp;
4770
4771         if (reset_mask == 0)
4772                 return;
4773
4774         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4775
4776         cik_print_gpu_status_regs(rdev);
4777         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4778                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4779         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4780                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4781
4782         /* disable CG/PG */
4783         cik_fini_pg(rdev);
4784         cik_fini_cg(rdev);
4785
4786         /* stop the rlc */
4787         cik_rlc_stop(rdev);
4788
4789         /* Disable GFX parsing/prefetching */
4790         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4791
4792         /* Disable MEC parsing/prefetching */
4793         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4794
4795         if (reset_mask & RADEON_RESET_DMA) {
4796                 /* sdma0 */
4797                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4798                 tmp |= SDMA_HALT;
4799                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4800         }
4801         if (reset_mask & RADEON_RESET_DMA1) {
4802                 /* sdma1 */
4803                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4804                 tmp |= SDMA_HALT;
4805                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4806         }
4807
4808         evergreen_mc_stop(rdev, &save);
4809         if (evergreen_mc_wait_for_idle(rdev)) {
4810                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4811         }
4812
4813         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4814                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4815
4816         if (reset_mask & RADEON_RESET_CP) {
4817                 grbm_soft_reset |= SOFT_RESET_CP;
4818
4819                 srbm_soft_reset |= SOFT_RESET_GRBM;
4820         }
4821
4822         if (reset_mask & RADEON_RESET_DMA)
4823                 srbm_soft_reset |= SOFT_RESET_SDMA;
4824
4825         if (reset_mask & RADEON_RESET_DMA1)
4826                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4827
4828         if (reset_mask & RADEON_RESET_DISPLAY)
4829                 srbm_soft_reset |= SOFT_RESET_DC;
4830
4831         if (reset_mask & RADEON_RESET_RLC)
4832                 grbm_soft_reset |= SOFT_RESET_RLC;
4833
4834         if (reset_mask & RADEON_RESET_SEM)
4835                 srbm_soft_reset |= SOFT_RESET_SEM;
4836
4837         if (reset_mask & RADEON_RESET_IH)
4838                 srbm_soft_reset |= SOFT_RESET_IH;
4839
4840         if (reset_mask & RADEON_RESET_GRBM)
4841                 srbm_soft_reset |= SOFT_RESET_GRBM;
4842
4843         if (reset_mask & RADEON_RESET_VMC)
4844                 srbm_soft_reset |= SOFT_RESET_VMC;
4845
4846         if (!(rdev->flags & RADEON_IS_IGP)) {
4847                 if (reset_mask & RADEON_RESET_MC)
4848                         srbm_soft_reset |= SOFT_RESET_MC;
4849         }
4850
4851         if (grbm_soft_reset) {
4852                 tmp = RREG32(GRBM_SOFT_RESET);
4853                 tmp |= grbm_soft_reset;
4854                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4855                 WREG32(GRBM_SOFT_RESET, tmp);
4856                 tmp = RREG32(GRBM_SOFT_RESET);
4857
4858                 udelay(50);
4859
4860                 tmp &= ~grbm_soft_reset;
4861                 WREG32(GRBM_SOFT_RESET, tmp);
4862                 tmp = RREG32(GRBM_SOFT_RESET);
4863         }
4864
4865         if (srbm_soft_reset) {
4866                 tmp = RREG32(SRBM_SOFT_RESET);
4867                 tmp |= srbm_soft_reset;
4868                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4869                 WREG32(SRBM_SOFT_RESET, tmp);
4870                 tmp = RREG32(SRBM_SOFT_RESET);
4871
4872                 udelay(50);
4873
4874                 tmp &= ~srbm_soft_reset;
4875                 WREG32(SRBM_SOFT_RESET, tmp);
4876                 tmp = RREG32(SRBM_SOFT_RESET);
4877         }
4878
4879         /* Wait a little for things to settle down */
4880         udelay(50);
4881
4882         evergreen_mc_resume(rdev, &save);
4883         udelay(50);
4884
4885         cik_print_gpu_status_regs(rdev);
4886 }
4887
4888 struct kv_reset_save_regs {
4889         u32 gmcon_reng_execute;
4890         u32 gmcon_misc;
4891         u32 gmcon_misc3;
4892 };
4893
4894 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4895                                    struct kv_reset_save_regs *save)
4896 {
4897         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4898         save->gmcon_misc = RREG32(GMCON_MISC);
4899         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4900
4901         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4902         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4903                                                 STCTRL_STUTTER_EN));
4904 }
4905
4906 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4907                                       struct kv_reset_save_regs *save)
4908 {
4909         int i;
4910
4911         WREG32(GMCON_PGFSM_WRITE, 0);
4912         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4913
4914         for (i = 0; i < 5; i++)
4915                 WREG32(GMCON_PGFSM_WRITE, 0);
4916
4917         WREG32(GMCON_PGFSM_WRITE, 0);
4918         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4919
4920         for (i = 0; i < 5; i++)
4921                 WREG32(GMCON_PGFSM_WRITE, 0);
4922
4923         WREG32(GMCON_PGFSM_WRITE, 0x210000);
4924         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4925
4926         for (i = 0; i < 5; i++)
4927                 WREG32(GMCON_PGFSM_WRITE, 0);
4928
4929         WREG32(GMCON_PGFSM_WRITE, 0x21003);
4930         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4931
4932         for (i = 0; i < 5; i++)
4933                 WREG32(GMCON_PGFSM_WRITE, 0);
4934
4935         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4936         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4937
4938         for (i = 0; i < 5; i++)
4939                 WREG32(GMCON_PGFSM_WRITE, 0);
4940
4941         WREG32(GMCON_PGFSM_WRITE, 0);
4942         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4943
4944         for (i = 0; i < 5; i++)
4945                 WREG32(GMCON_PGFSM_WRITE, 0);
4946
4947         WREG32(GMCON_PGFSM_WRITE, 0x420000);
4948         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4949
4950         for (i = 0; i < 5; i++)
4951                 WREG32(GMCON_PGFSM_WRITE, 0);
4952
4953         WREG32(GMCON_PGFSM_WRITE, 0x120202);
4954         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4955
4956         for (i = 0; i < 5; i++)
4957                 WREG32(GMCON_PGFSM_WRITE, 0);
4958
4959         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4960         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4961
4962         for (i = 0; i < 5; i++)
4963                 WREG32(GMCON_PGFSM_WRITE, 0);
4964
4965         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4966         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4967
4968         for (i = 0; i < 5; i++)
4969                 WREG32(GMCON_PGFSM_WRITE, 0);
4970
4971         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
4972         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
4973
4974         WREG32(GMCON_MISC3, save->gmcon_misc3);
4975         WREG32(GMCON_MISC, save->gmcon_misc);
4976         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
4977 }
4978
4979 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
4980 {
4981         struct evergreen_mc_save save;
4982         struct kv_reset_save_regs kv_save = { 0 };
4983         u32 tmp, i;
4984
4985         dev_info(rdev->dev, "GPU pci config reset\n");
4986
4987         /* disable dpm? */
4988
4989         /* disable cg/pg */
4990         cik_fini_pg(rdev);
4991         cik_fini_cg(rdev);
4992
4993         /* Disable GFX parsing/prefetching */
4994         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4995
4996         /* Disable MEC parsing/prefetching */
4997         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4998
4999         /* sdma0 */
5000         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5001         tmp |= SDMA_HALT;
5002         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5003         /* sdma1 */
5004         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5005         tmp |= SDMA_HALT;
5006         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5007         /* XXX other engines? */
5008
5009         /* halt the rlc, disable cp internal ints */
5010         cik_rlc_stop(rdev);
5011
5012         udelay(50);
5013
5014         /* disable mem access */
5015         evergreen_mc_stop(rdev, &save);
5016         if (evergreen_mc_wait_for_idle(rdev)) {
5017                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5018         }
5019
5020         if (rdev->flags & RADEON_IS_IGP)
5021                 kv_save_regs_for_reset(rdev, &kv_save);
5022
5023         /* disable BM */
5024         pci_clear_master(rdev->pdev);
5025         /* reset */
5026         radeon_pci_config_reset(rdev);
5027
5028         udelay(100);
5029
5030         /* wait for asic to come out of reset */
5031         for (i = 0; i < rdev->usec_timeout; i++) {
5032                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5033                         break;
5034                 udelay(1);
5035         }
5036
5037         /* does asic init need to be run first??? */
5038         if (rdev->flags & RADEON_IS_IGP)
5039                 kv_restore_regs_for_reset(rdev, &kv_save);
5040 }
5041
5042 /**
5043  * cik_asic_reset - soft reset GPU
5044  *
5045  * @rdev: radeon_device pointer
5046  *
5047  * Look up which blocks are hung and attempt
5048  * to reset them.
5049  * Returns 0 for success.
5050  */
5051 int cik_asic_reset(struct radeon_device *rdev)
5052 {
5053         u32 reset_mask;
5054
5055         reset_mask = cik_gpu_check_soft_reset(rdev);
5056
5057         if (reset_mask)
5058                 r600_set_bios_scratch_engine_hung(rdev, true);
5059
5060         /* try soft reset */
5061         cik_gpu_soft_reset(rdev, reset_mask);
5062
5063         reset_mask = cik_gpu_check_soft_reset(rdev);
5064
5065         /* try pci config reset */
5066         if (reset_mask && radeon_hard_reset)
5067                 cik_gpu_pci_config_reset(rdev);
5068
5069         reset_mask = cik_gpu_check_soft_reset(rdev);
5070
5071         if (!reset_mask)
5072                 r600_set_bios_scratch_engine_hung(rdev, false);
5073
5074         return 0;
5075 }
5076
5077 /**
5078  * cik_gfx_is_lockup - check if the 3D engine is locked up
5079  *
5080  * @rdev: radeon_device pointer
5081  * @ring: radeon_ring structure holding ring information
5082  *
5083  * Check if the 3D engine is locked up (CIK).
5084  * Returns true if the engine is locked, false if not.
5085  */
5086 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5087 {
5088         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5089
5090         if (!(reset_mask & (RADEON_RESET_GFX |
5091                             RADEON_RESET_COMPUTE |
5092                             RADEON_RESET_CP))) {
5093                 radeon_ring_lockup_update(ring);
5094                 return false;
5095         }
5096         /* force CP activities */
5097         radeon_ring_force_activity(rdev, ring);
5098         return radeon_ring_test_lockup(rdev, ring);
5099 }
5100
5101 /* MC */
5102 /**
5103  * cik_mc_program - program the GPU memory controller
5104  *
5105  * @rdev: radeon_device pointer
5106  *
5107  * Set the location of vram, gart, and AGP in the GPU's
5108  * physical address space (CIK).
5109  */
5110 static void cik_mc_program(struct radeon_device *rdev)
5111 {
5112         struct evergreen_mc_save save;
5113         u32 tmp;
5114         int i, j;
5115
5116         /* Initialize HDP */
5117         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5118                 WREG32((0x2c14 + j), 0x00000000);
5119                 WREG32((0x2c18 + j), 0x00000000);
5120                 WREG32((0x2c1c + j), 0x00000000);
5121                 WREG32((0x2c20 + j), 0x00000000);
5122                 WREG32((0x2c24 + j), 0x00000000);
5123         }
5124         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5125
5126         evergreen_mc_stop(rdev, &save);
5127         if (radeon_mc_wait_for_idle(rdev)) {
5128                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5129         }
5130         /* Lockout access through VGA aperture*/
5131         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5132         /* Update configuration */
5133         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5134                rdev->mc.vram_start >> 12);
5135         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5136                rdev->mc.vram_end >> 12);
5137         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5138                rdev->vram_scratch.gpu_addr >> 12);
5139         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5140         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5141         WREG32(MC_VM_FB_LOCATION, tmp);
5142         /* XXX double check these! */
5143         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5144         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5145         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5146         WREG32(MC_VM_AGP_BASE, 0);
5147         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5148         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5149         if (radeon_mc_wait_for_idle(rdev)) {
5150                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5151         }
5152         evergreen_mc_resume(rdev, &save);
5153         /* we need to own VRAM, so turn off the VGA renderer here
5154          * to stop it overwriting our objects */
5155         rv515_vga_render_disable(rdev);
5156 }
5157
5158 /**
5159  * cik_mc_init - initialize the memory controller driver params
5160  *
5161  * @rdev: radeon_device pointer
5162  *
5163  * Look up the amount of vram, vram width, and decide how to place
5164  * vram and gart within the GPU's physical address space (CIK).
5165  * Returns 0 for success.
5166  */
5167 static int cik_mc_init(struct radeon_device *rdev)
5168 {
5169         u32 tmp;
5170         int chansize, numchan;
5171
5172         /* Get VRAM informations */
5173         rdev->mc.vram_is_ddr = true;
5174         tmp = RREG32(MC_ARB_RAMCFG);
5175         if (tmp & CHANSIZE_MASK) {
5176                 chansize = 64;
5177         } else {
5178                 chansize = 32;
5179         }
5180         tmp = RREG32(MC_SHARED_CHMAP);
5181         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5182         case 0:
5183         default:
5184                 numchan = 1;
5185                 break;
5186         case 1:
5187                 numchan = 2;
5188                 break;
5189         case 2:
5190                 numchan = 4;
5191                 break;
5192         case 3:
5193                 numchan = 8;
5194                 break;
5195         case 4:
5196                 numchan = 3;
5197                 break;
5198         case 5:
5199                 numchan = 6;
5200                 break;
5201         case 6:
5202                 numchan = 10;
5203                 break;
5204         case 7:
5205                 numchan = 12;
5206                 break;
5207         case 8:
5208                 numchan = 16;
5209                 break;
5210         }
5211         rdev->mc.vram_width = numchan * chansize;
5212         /* Could aper size report 0 ? */
5213         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5214         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5215         /* size in MB on si */
5216         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5217         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5218         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5219         si_vram_gtt_location(rdev, &rdev->mc);
5220         radeon_update_bandwidth_info(rdev);
5221
5222         return 0;
5223 }
5224
5225 /*
5226  * GART
5227  * VMID 0 is the physical GPU addresses as used by the kernel.
5228  * VMIDs 1-15 are used for userspace clients and are handled
5229  * by the radeon vm/hsa code.
5230  */
5231 /**
5232  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5233  *
5234  * @rdev: radeon_device pointer
5235  *
5236  * Flush the TLB for the VMID 0 page table (CIK).
5237  */
5238 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5239 {
5240         /* flush hdp cache */
5241         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5242
5243         /* bits 0-15 are the VM contexts0-15 */
5244         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5245 }
5246
5247 /**
5248  * cik_pcie_gart_enable - gart enable
5249  *
5250  * @rdev: radeon_device pointer
5251  *
5252  * This sets up the TLBs, programs the page tables for VMID0,
5253  * sets up the hw for VMIDs 1-15 which are allocated on
5254  * demand, and sets up the global locations for the LDS, GDS,
5255  * and GPUVM for FSA64 clients (CIK).
5256  * Returns 0 for success, errors for failure.
5257  */
5258 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5259 {
5260         int r, i;
5261
5262         if (rdev->gart.robj == NULL) {
5263                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5264                 return -EINVAL;
5265         }
5266         r = radeon_gart_table_vram_pin(rdev);
5267         if (r)
5268                 return r;
5269         radeon_gart_restore(rdev);
5270         /* Setup TLB control */
5271         WREG32(MC_VM_MX_L1_TLB_CNTL,
5272                (0xA << 7) |
5273                ENABLE_L1_TLB |
5274                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5275                ENABLE_ADVANCED_DRIVER_MODEL |
5276                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5277         /* Setup L2 cache */
5278         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5279                ENABLE_L2_FRAGMENT_PROCESSING |
5280                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5281                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5282                EFFECTIVE_L2_QUEUE_SIZE(7) |
5283                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5284         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5285         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5286                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5287         /* setup context0 */
5288         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5289         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5290         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5291         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5292                         (u32)(rdev->dummy_page.addr >> 12));
5293         WREG32(VM_CONTEXT0_CNTL2, 0);
5294         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5295                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5296
5297         WREG32(0x15D4, 0);
5298         WREG32(0x15D8, 0);
5299         WREG32(0x15DC, 0);
5300
5301         /* empty context1-15 */
5302         /* FIXME start with 4G, once using 2 level pt switch to full
5303          * vm size space
5304          */
5305         /* set vm size, must be a multiple of 4 */
5306         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5307         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5308         for (i = 1; i < 16; i++) {
5309                 if (i < 8)
5310                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5311                                rdev->gart.table_addr >> 12);
5312                 else
5313                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5314                                rdev->gart.table_addr >> 12);
5315         }
5316
5317         /* enable context1-15 */
5318         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5319                (u32)(rdev->dummy_page.addr >> 12));
5320         WREG32(VM_CONTEXT1_CNTL2, 4);
5321         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5322                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5323                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5324                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5325                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5326                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5327                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5328                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5329                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5330                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5331                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5332                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5333                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5334
5335         /* TC cache setup ??? */
5336         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5337         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5338         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5339
5340         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5341         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5342         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5343         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5344         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5345
5346         WREG32(TC_CFG_L1_VOLATILE, 0);
5347         WREG32(TC_CFG_L2_VOLATILE, 0);
5348
5349         if (rdev->family == CHIP_KAVERI) {
5350                 u32 tmp = RREG32(CHUB_CONTROL);
5351                 tmp &= ~BYPASS_VM;
5352                 WREG32(CHUB_CONTROL, tmp);
5353         }
5354
5355         /* XXX SH_MEM regs */
5356         /* where to put LDS, scratch, GPUVM in FSA64 space */
5357         mutex_lock(&rdev->srbm_mutex);
5358         for (i = 0; i < 16; i++) {
5359                 cik_srbm_select(rdev, 0, 0, 0, i);
5360                 /* CP and shaders */
5361                 WREG32(SH_MEM_CONFIG, 0);
5362                 WREG32(SH_MEM_APE1_BASE, 1);
5363                 WREG32(SH_MEM_APE1_LIMIT, 0);
5364                 WREG32(SH_MEM_BASES, 0);
5365                 /* SDMA GFX */
5366                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5367                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5368                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5369                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5370                 /* XXX SDMA RLC - todo */
5371         }
5372         cik_srbm_select(rdev, 0, 0, 0, 0);
5373         mutex_unlock(&rdev->srbm_mutex);
5374
5375         cik_pcie_gart_tlb_flush(rdev);
5376         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5377                  (unsigned)(rdev->mc.gtt_size >> 20),
5378                  (unsigned long long)rdev->gart.table_addr);
5379         rdev->gart.ready = true;
5380         return 0;
5381 }
5382
5383 /**
5384  * cik_pcie_gart_disable - gart disable
5385  *
5386  * @rdev: radeon_device pointer
5387  *
5388  * This disables all VM page table (CIK).
5389  */
5390 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5391 {
5392         /* Disable all tables */
5393         WREG32(VM_CONTEXT0_CNTL, 0);
5394         WREG32(VM_CONTEXT1_CNTL, 0);
5395         /* Setup TLB control */
5396         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5397                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5398         /* Setup L2 cache */
5399         WREG32(VM_L2_CNTL,
5400                ENABLE_L2_FRAGMENT_PROCESSING |
5401                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5402                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5403                EFFECTIVE_L2_QUEUE_SIZE(7) |
5404                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5405         WREG32(VM_L2_CNTL2, 0);
5406         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5407                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5408         radeon_gart_table_vram_unpin(rdev);
5409 }
5410
5411 /**
5412  * cik_pcie_gart_fini - vm fini callback
5413  *
5414  * @rdev: radeon_device pointer
5415  *
5416  * Tears down the driver GART/VM setup (CIK).
5417  */
5418 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5419 {
5420         cik_pcie_gart_disable(rdev);
5421         radeon_gart_table_vram_free(rdev);
5422         radeon_gart_fini(rdev);
5423 }
5424
5425 /* vm parser */
5426 /**
5427  * cik_ib_parse - vm ib_parse callback
5428  *
5429  * @rdev: radeon_device pointer
5430  * @ib: indirect buffer pointer
5431  *
5432  * CIK uses hw IB checking so this is a nop (CIK).
5433  */
5434 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5435 {
5436         return 0;
5437 }
5438
5439 /*
5440  * vm
5441  * VMID 0 is the physical GPU addresses as used by the kernel.
5442  * VMIDs 1-15 are used for userspace clients and are handled
5443  * by the radeon vm/hsa code.
5444  */
5445 /**
5446  * cik_vm_init - cik vm init callback
5447  *
5448  * @rdev: radeon_device pointer
5449  *
5450  * Inits cik specific vm parameters (number of VMs, base of vram for
5451  * VMIDs 1-15) (CIK).
5452  * Returns 0 for success.
5453  */
5454 int cik_vm_init(struct radeon_device *rdev)
5455 {
5456         /* number of VMs */
5457         rdev->vm_manager.nvm = 16;
5458         /* base offset of vram pages */
5459         if (rdev->flags & RADEON_IS_IGP) {
5460                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5461                 tmp <<= 22;
5462                 rdev->vm_manager.vram_base_offset = tmp;
5463         } else
5464                 rdev->vm_manager.vram_base_offset = 0;
5465
5466         return 0;
5467 }
5468
5469 /**
5470  * cik_vm_fini - cik vm fini callback
5471  *
5472  * @rdev: radeon_device pointer
5473  *
5474  * Tear down any asic specific VM setup (CIK).
5475  */
5476 void cik_vm_fini(struct radeon_device *rdev)
5477 {
5478 }
5479
5480 /**
5481  * cik_vm_decode_fault - print human readable fault info
5482  *
5483  * @rdev: radeon_device pointer
5484  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5485  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5486  *
5487  * Print human readable fault information (CIK).
5488  */
5489 static void cik_vm_decode_fault(struct radeon_device *rdev,
5490                                 u32 status, u32 addr, u32 mc_client)
5491 {
5492         u32 mc_id;
5493         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5494         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5495         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5496                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5497
5498         if (rdev->family == CHIP_HAWAII)
5499                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5500         else
5501                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5502
5503         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5504                protections, vmid, addr,
5505                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5506                block, mc_client, mc_id);
5507 }
5508
5509 /**
5510  * cik_vm_flush - cik vm flush using the CP
5511  *
5512  * @rdev: radeon_device pointer
5513  *
5514  * Update the page table base and flush the VM TLB
5515  * using the CP (CIK).
5516  */
5517 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5518 {
5519         struct radeon_ring *ring = &rdev->ring[ridx];
5520
5521         if (vm == NULL)
5522                 return;
5523
5524         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5525         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5526                                  WRITE_DATA_DST_SEL(0)));
5527         if (vm->id < 8) {
5528                 radeon_ring_write(ring,
5529                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5530         } else {
5531                 radeon_ring_write(ring,
5532                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5533         }
5534         radeon_ring_write(ring, 0);
5535         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5536
5537         /* update SH_MEM_* regs */
5538         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5539         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5540                                  WRITE_DATA_DST_SEL(0)));
5541         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5542         radeon_ring_write(ring, 0);
5543         radeon_ring_write(ring, VMID(vm->id));
5544
5545         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5546         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5547                                  WRITE_DATA_DST_SEL(0)));
5548         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5549         radeon_ring_write(ring, 0);
5550
5551         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5552         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5553         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5554         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5555
5556         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5557         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5558                                  WRITE_DATA_DST_SEL(0)));
5559         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5560         radeon_ring_write(ring, 0);
5561         radeon_ring_write(ring, VMID(0));
5562
5563         /* HDP flush */
5564         cik_hdp_flush_cp_ring_emit(rdev, ridx);
5565
5566         /* bits 0-15 are the VM contexts0-15 */
5567         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5568         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5569                                  WRITE_DATA_DST_SEL(0)));
5570         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5571         radeon_ring_write(ring, 0);
5572         radeon_ring_write(ring, 1 << vm->id);
5573
5574         /* compute doesn't have PFP */
5575         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5576                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5577                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5578                 radeon_ring_write(ring, 0x0);
5579         }
5580 }
5581
5582 /*
5583  * RLC
5584  * The RLC is a multi-purpose microengine that handles a
5585  * variety of functions, the most important of which is
5586  * the interrupt controller.
5587  */
5588 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5589                                           bool enable)
5590 {
5591         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5592
5593         if (enable)
5594                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5595         else
5596                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5597         WREG32(CP_INT_CNTL_RING0, tmp);
5598 }
5599
5600 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5601 {
5602         u32 tmp;
5603
5604         tmp = RREG32(RLC_LB_CNTL);
5605         if (enable)
5606                 tmp |= LOAD_BALANCE_ENABLE;
5607         else
5608                 tmp &= ~LOAD_BALANCE_ENABLE;
5609         WREG32(RLC_LB_CNTL, tmp);
5610 }
5611
5612 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5613 {
5614         u32 i, j, k;
5615         u32 mask;
5616
5617         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5618                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5619                         cik_select_se_sh(rdev, i, j);
5620                         for (k = 0; k < rdev->usec_timeout; k++) {
5621                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5622                                         break;
5623                                 udelay(1);
5624                         }
5625                 }
5626         }
5627         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5628
5629         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5630         for (k = 0; k < rdev->usec_timeout; k++) {
5631                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5632                         break;
5633                 udelay(1);
5634         }
5635 }
5636
5637 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5638 {
5639         u32 tmp;
5640
5641         tmp = RREG32(RLC_CNTL);
5642         if (tmp != rlc)
5643                 WREG32(RLC_CNTL, rlc);
5644 }
5645
5646 static u32 cik_halt_rlc(struct radeon_device *rdev)
5647 {
5648         u32 data, orig;
5649
5650         orig = data = RREG32(RLC_CNTL);
5651
5652         if (data & RLC_ENABLE) {
5653                 u32 i;
5654
5655                 data &= ~RLC_ENABLE;
5656                 WREG32(RLC_CNTL, data);
5657
5658                 for (i = 0; i < rdev->usec_timeout; i++) {
5659                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5660                                 break;
5661                         udelay(1);
5662                 }
5663
5664                 cik_wait_for_rlc_serdes(rdev);
5665         }
5666
5667         return orig;
5668 }
5669
5670 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5671 {
5672         u32 tmp, i, mask;
5673
5674         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5675         WREG32(RLC_GPR_REG2, tmp);
5676
5677         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5678         for (i = 0; i < rdev->usec_timeout; i++) {
5679                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5680                         break;
5681                 udelay(1);
5682         }
5683
5684         for (i = 0; i < rdev->usec_timeout; i++) {
5685                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5686                         break;
5687                 udelay(1);
5688         }
5689 }
5690
5691 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5692 {
5693         u32 tmp;
5694
5695         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5696         WREG32(RLC_GPR_REG2, tmp);
5697 }
5698
5699 /**
5700  * cik_rlc_stop - stop the RLC ME
5701  *
5702  * @rdev: radeon_device pointer
5703  *
5704  * Halt the RLC ME (MicroEngine) (CIK).
5705  */
5706 static void cik_rlc_stop(struct radeon_device *rdev)
5707 {
5708         WREG32(RLC_CNTL, 0);
5709
5710         cik_enable_gui_idle_interrupt(rdev, false);
5711
5712         cik_wait_for_rlc_serdes(rdev);
5713 }
5714
5715 /**
5716  * cik_rlc_start - start the RLC ME
5717  *
5718  * @rdev: radeon_device pointer
5719  *
5720  * Unhalt the RLC ME (MicroEngine) (CIK).
5721  */
5722 static void cik_rlc_start(struct radeon_device *rdev)
5723 {
5724         WREG32(RLC_CNTL, RLC_ENABLE);
5725
5726         cik_enable_gui_idle_interrupt(rdev, true);
5727
5728         udelay(50);
5729 }
5730
5731 /**
5732  * cik_rlc_resume - setup the RLC hw
5733  *
5734  * @rdev: radeon_device pointer
5735  *
5736  * Initialize the RLC registers, load the ucode,
5737  * and start the RLC (CIK).
5738  * Returns 0 for success, -EINVAL if the ucode is not available.
5739  */
5740 static int cik_rlc_resume(struct radeon_device *rdev)
5741 {
5742         u32 i, size, tmp;
5743         const __be32 *fw_data;
5744
5745         if (!rdev->rlc_fw)
5746                 return -EINVAL;
5747
5748         switch (rdev->family) {
5749         case CHIP_BONAIRE:
5750         case CHIP_HAWAII:
5751         default:
5752                 size = BONAIRE_RLC_UCODE_SIZE;
5753                 break;
5754         case CHIP_KAVERI:
5755                 size = KV_RLC_UCODE_SIZE;
5756                 break;
5757         case CHIP_KABINI:
5758                 size = KB_RLC_UCODE_SIZE;
5759                 break;
5760         }
5761
5762         cik_rlc_stop(rdev);
5763
5764         /* disable CG */
5765         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5766         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5767
5768         si_rlc_reset(rdev);
5769
5770         cik_init_pg(rdev);
5771
5772         cik_init_cg(rdev);
5773
5774         WREG32(RLC_LB_CNTR_INIT, 0);
5775         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5776
5777         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5778         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5779         WREG32(RLC_LB_PARAMS, 0x00600408);
5780         WREG32(RLC_LB_CNTL, 0x80000004);
5781
5782         WREG32(RLC_MC_CNTL, 0);
5783         WREG32(RLC_UCODE_CNTL, 0);
5784
5785         fw_data = (const __be32 *)rdev->rlc_fw->data;
5786                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5787         for (i = 0; i < size; i++)
5788                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5789         WREG32(RLC_GPM_UCODE_ADDR, 0);
5790
5791         /* XXX - find out what chips support lbpw */
5792         cik_enable_lbpw(rdev, false);
5793
5794         if (rdev->family == CHIP_BONAIRE)
5795                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5796
5797         cik_rlc_start(rdev);
5798
5799         return 0;
5800 }
5801
5802 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5803 {
5804         u32 data, orig, tmp, tmp2;
5805
5806         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5807
5808         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5809                 cik_enable_gui_idle_interrupt(rdev, true);
5810
5811                 tmp = cik_halt_rlc(rdev);
5812
5813                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5814                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5815                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5816                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5817                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5818
5819                 cik_update_rlc(rdev, tmp);
5820
5821                 data |= CGCG_EN | CGLS_EN;
5822         } else {
5823                 cik_enable_gui_idle_interrupt(rdev, false);
5824
5825                 RREG32(CB_CGTT_SCLK_CTRL);
5826                 RREG32(CB_CGTT_SCLK_CTRL);
5827                 RREG32(CB_CGTT_SCLK_CTRL);
5828                 RREG32(CB_CGTT_SCLK_CTRL);
5829
5830                 data &= ~(CGCG_EN | CGLS_EN);
5831         }
5832
5833         if (orig != data)
5834                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5835
5836 }
5837
5838 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5839 {
5840         u32 data, orig, tmp = 0;
5841
5842         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5843                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5844                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5845                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5846                                 data |= CP_MEM_LS_EN;
5847                                 if (orig != data)
5848                                         WREG32(CP_MEM_SLP_CNTL, data);
5849                         }
5850                 }
5851
5852                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5853                 data &= 0xfffffffd;
5854                 if (orig != data)
5855                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5856
5857                 tmp = cik_halt_rlc(rdev);
5858
5859                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5860                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5861                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5862                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5863                 WREG32(RLC_SERDES_WR_CTRL, data);
5864
5865                 cik_update_rlc(rdev, tmp);
5866
5867                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5868                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5869                         data &= ~SM_MODE_MASK;
5870                         data |= SM_MODE(0x2);
5871                         data |= SM_MODE_ENABLE;
5872                         data &= ~CGTS_OVERRIDE;
5873                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5874                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5875                                 data &= ~CGTS_LS_OVERRIDE;
5876                         data &= ~ON_MONITOR_ADD_MASK;
5877                         data |= ON_MONITOR_ADD_EN;
5878                         data |= ON_MONITOR_ADD(0x96);
5879                         if (orig != data)
5880                                 WREG32(CGTS_SM_CTRL_REG, data);
5881                 }
5882         } else {
5883                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5884                 data |= 0x00000002;
5885                 if (orig != data)
5886                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5887
5888                 data = RREG32(RLC_MEM_SLP_CNTL);
5889                 if (data & RLC_MEM_LS_EN) {
5890                         data &= ~RLC_MEM_LS_EN;
5891                         WREG32(RLC_MEM_SLP_CNTL, data);
5892                 }
5893
5894                 data = RREG32(CP_MEM_SLP_CNTL);
5895                 if (data & CP_MEM_LS_EN) {
5896                         data &= ~CP_MEM_LS_EN;
5897                         WREG32(CP_MEM_SLP_CNTL, data);
5898                 }
5899
5900                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5901                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5902                 if (orig != data)
5903                         WREG32(CGTS_SM_CTRL_REG, data);
5904
5905                 tmp = cik_halt_rlc(rdev);
5906
5907                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5908                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5909                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5910                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5911                 WREG32(RLC_SERDES_WR_CTRL, data);
5912
5913                 cik_update_rlc(rdev, tmp);
5914         }
5915 }
5916
5917 static const u32 mc_cg_registers[] =
5918 {
5919         MC_HUB_MISC_HUB_CG,
5920         MC_HUB_MISC_SIP_CG,
5921         MC_HUB_MISC_VM_CG,
5922         MC_XPB_CLK_GAT,
5923         ATC_MISC_CG,
5924         MC_CITF_MISC_WR_CG,
5925         MC_CITF_MISC_RD_CG,
5926         MC_CITF_MISC_VM_CG,
5927         VM_L2_CG,
5928 };
5929
5930 static void cik_enable_mc_ls(struct radeon_device *rdev,
5931                              bool enable)
5932 {
5933         int i;
5934         u32 orig, data;
5935
5936         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5937                 orig = data = RREG32(mc_cg_registers[i]);
5938                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5939                         data |= MC_LS_ENABLE;
5940                 else
5941                         data &= ~MC_LS_ENABLE;
5942                 if (data != orig)
5943                         WREG32(mc_cg_registers[i], data);
5944         }
5945 }
5946
5947 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5948                                bool enable)
5949 {
5950         int i;
5951         u32 orig, data;
5952
5953         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5954                 orig = data = RREG32(mc_cg_registers[i]);
5955                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5956                         data |= MC_CG_ENABLE;
5957                 else
5958                         data &= ~MC_CG_ENABLE;
5959                 if (data != orig)
5960                         WREG32(mc_cg_registers[i], data);
5961         }
5962 }
5963
5964 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5965                                  bool enable)
5966 {
5967         u32 orig, data;
5968
5969         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5970                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5971                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5972         } else {
5973                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5974                 data |= 0xff000000;
5975                 if (data != orig)
5976                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5977
5978                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5979                 data |= 0xff000000;
5980                 if (data != orig)
5981                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5982         }
5983 }
5984
5985 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5986                                  bool enable)
5987 {
5988         u32 orig, data;
5989
5990         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5991                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5992                 data |= 0x100;
5993                 if (orig != data)
5994                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5995
5996                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5997                 data |= 0x100;
5998                 if (orig != data)
5999                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6000         } else {
6001                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6002                 data &= ~0x100;
6003                 if (orig != data)
6004                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6005
6006                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6007                 data &= ~0x100;
6008                 if (orig != data)
6009                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6010         }
6011 }
6012
6013 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6014                                 bool enable)
6015 {
6016         u32 orig, data;
6017
6018         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6019                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6020                 data = 0xfff;
6021                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6022
6023                 orig = data = RREG32(UVD_CGC_CTRL);
6024                 data |= DCM;
6025                 if (orig != data)
6026                         WREG32(UVD_CGC_CTRL, data);
6027         } else {
6028                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6029                 data &= ~0xfff;
6030                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6031
6032                 orig = data = RREG32(UVD_CGC_CTRL);
6033                 data &= ~DCM;
6034                 if (orig != data)
6035                         WREG32(UVD_CGC_CTRL, data);
6036         }
6037 }
6038
6039 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6040                                bool enable)
6041 {
6042         u32 orig, data;
6043
6044         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6045
6046         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6047                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6048                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6049         else
6050                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6051                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6052
6053         if (orig != data)
6054                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6055 }
6056
6057 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6058                                 bool enable)
6059 {
6060         u32 orig, data;
6061
6062         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6063
6064         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6065                 data &= ~CLOCK_GATING_DIS;
6066         else
6067                 data |= CLOCK_GATING_DIS;
6068
6069         if (orig != data)
6070                 WREG32(HDP_HOST_PATH_CNTL, data);
6071 }
6072
6073 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6074                               bool enable)
6075 {
6076         u32 orig, data;
6077
6078         orig = data = RREG32(HDP_MEM_POWER_LS);
6079
6080         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6081                 data |= HDP_LS_ENABLE;
6082         else
6083                 data &= ~HDP_LS_ENABLE;
6084
6085         if (orig != data)
6086                 WREG32(HDP_MEM_POWER_LS, data);
6087 }
6088
6089 void cik_update_cg(struct radeon_device *rdev,
6090                    u32 block, bool enable)
6091 {
6092
6093         if (block & RADEON_CG_BLOCK_GFX) {
6094                 cik_enable_gui_idle_interrupt(rdev, false);
6095                 /* order matters! */
6096                 if (enable) {
6097                         cik_enable_mgcg(rdev, true);
6098                         cik_enable_cgcg(rdev, true);
6099                 } else {
6100                         cik_enable_cgcg(rdev, false);
6101                         cik_enable_mgcg(rdev, false);
6102                 }
6103                 cik_enable_gui_idle_interrupt(rdev, true);
6104         }
6105
6106         if (block & RADEON_CG_BLOCK_MC) {
6107                 if (!(rdev->flags & RADEON_IS_IGP)) {
6108                         cik_enable_mc_mgcg(rdev, enable);
6109                         cik_enable_mc_ls(rdev, enable);
6110                 }
6111         }
6112
6113         if (block & RADEON_CG_BLOCK_SDMA) {
6114                 cik_enable_sdma_mgcg(rdev, enable);
6115                 cik_enable_sdma_mgls(rdev, enable);
6116         }
6117
6118         if (block & RADEON_CG_BLOCK_BIF) {
6119                 cik_enable_bif_mgls(rdev, enable);
6120         }
6121
6122         if (block & RADEON_CG_BLOCK_UVD) {
6123                 if (rdev->has_uvd)
6124                         cik_enable_uvd_mgcg(rdev, enable);
6125         }
6126
6127         if (block & RADEON_CG_BLOCK_HDP) {
6128                 cik_enable_hdp_mgcg(rdev, enable);
6129                 cik_enable_hdp_ls(rdev, enable);
6130         }
6131 }
6132
6133 static void cik_init_cg(struct radeon_device *rdev)
6134 {
6135
6136         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6137
6138         if (rdev->has_uvd)
6139                 si_init_uvd_internal_cg(rdev);
6140
6141         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6142                              RADEON_CG_BLOCK_SDMA |
6143                              RADEON_CG_BLOCK_BIF |
6144                              RADEON_CG_BLOCK_UVD |
6145                              RADEON_CG_BLOCK_HDP), true);
6146 }
6147
6148 static void cik_fini_cg(struct radeon_device *rdev)
6149 {
6150         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6151                              RADEON_CG_BLOCK_SDMA |
6152                              RADEON_CG_BLOCK_BIF |
6153                              RADEON_CG_BLOCK_UVD |
6154                              RADEON_CG_BLOCK_HDP), false);
6155
6156         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6157 }
6158
6159 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6160                                           bool enable)
6161 {
6162         u32 data, orig;
6163
6164         orig = data = RREG32(RLC_PG_CNTL);
6165         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6166                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6167         else
6168                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6169         if (orig != data)
6170                 WREG32(RLC_PG_CNTL, data);
6171 }
6172
6173 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6174                                           bool enable)
6175 {
6176         u32 data, orig;
6177
6178         orig = data = RREG32(RLC_PG_CNTL);
6179         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6180                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6181         else
6182                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6183         if (orig != data)
6184                 WREG32(RLC_PG_CNTL, data);
6185 }
6186
6187 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6188 {
6189         u32 data, orig;
6190
6191         orig = data = RREG32(RLC_PG_CNTL);
6192         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6193                 data &= ~DISABLE_CP_PG;
6194         else
6195                 data |= DISABLE_CP_PG;
6196         if (orig != data)
6197                 WREG32(RLC_PG_CNTL, data);
6198 }
6199
6200 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6201 {
6202         u32 data, orig;
6203
6204         orig = data = RREG32(RLC_PG_CNTL);
6205         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6206                 data &= ~DISABLE_GDS_PG;
6207         else
6208                 data |= DISABLE_GDS_PG;
6209         if (orig != data)
6210                 WREG32(RLC_PG_CNTL, data);
6211 }
6212
6213 #define CP_ME_TABLE_SIZE    96
6214 #define CP_ME_TABLE_OFFSET  2048
6215 #define CP_MEC_TABLE_OFFSET 4096
6216
6217 void cik_init_cp_pg_table(struct radeon_device *rdev)
6218 {
6219         const __be32 *fw_data;
6220         volatile u32 *dst_ptr;
6221         int me, i, max_me = 4;
6222         u32 bo_offset = 0;
6223         u32 table_offset;
6224
6225         if (rdev->family == CHIP_KAVERI)
6226                 max_me = 5;
6227
6228         if (rdev->rlc.cp_table_ptr == NULL)
6229                 return;
6230
6231         /* write the cp table buffer */
6232         dst_ptr = rdev->rlc.cp_table_ptr;
6233         for (me = 0; me < max_me; me++) {
6234                 if (me == 0) {
6235                         fw_data = (const __be32 *)rdev->ce_fw->data;
6236                         table_offset = CP_ME_TABLE_OFFSET;
6237                 } else if (me == 1) {
6238                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6239                         table_offset = CP_ME_TABLE_OFFSET;
6240                 } else if (me == 2) {
6241                         fw_data = (const __be32 *)rdev->me_fw->data;
6242                         table_offset = CP_ME_TABLE_OFFSET;
6243                 } else {
6244                         fw_data = (const __be32 *)rdev->mec_fw->data;
6245                         table_offset = CP_MEC_TABLE_OFFSET;
6246                 }
6247
6248                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6249                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6250                 }
6251                 bo_offset += CP_ME_TABLE_SIZE;
6252         }
6253 }
6254
6255 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6256                                 bool enable)
6257 {
6258         u32 data, orig;
6259
6260         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6261                 orig = data = RREG32(RLC_PG_CNTL);
6262                 data |= GFX_PG_ENABLE;
6263                 if (orig != data)
6264                         WREG32(RLC_PG_CNTL, data);
6265
6266                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6267                 data |= AUTO_PG_EN;
6268                 if (orig != data)
6269                         WREG32(RLC_AUTO_PG_CTRL, data);
6270         } else {
6271                 orig = data = RREG32(RLC_PG_CNTL);
6272                 data &= ~GFX_PG_ENABLE;
6273                 if (orig != data)
6274                         WREG32(RLC_PG_CNTL, data);
6275
6276                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6277                 data &= ~AUTO_PG_EN;
6278                 if (orig != data)
6279                         WREG32(RLC_AUTO_PG_CTRL, data);
6280
6281                 data = RREG32(DB_RENDER_CONTROL);
6282         }
6283 }
6284
6285 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6286 {
6287         u32 mask = 0, tmp, tmp1;
6288         int i;
6289
6290         cik_select_se_sh(rdev, se, sh);
6291         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6292         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6293         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6294
6295         tmp &= 0xffff0000;
6296
6297         tmp |= tmp1;
6298         tmp >>= 16;
6299
6300         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6301                 mask <<= 1;
6302                 mask |= 1;
6303         }
6304
6305         return (~tmp) & mask;
6306 }
6307
6308 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6309 {
6310         u32 i, j, k, active_cu_number = 0;
6311         u32 mask, counter, cu_bitmap;
6312         u32 tmp = 0;
6313
6314         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6315                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6316                         mask = 1;
6317                         cu_bitmap = 0;
6318                         counter = 0;
6319                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6320                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6321                                         if (counter < 2)
6322                                                 cu_bitmap |= mask;
6323                                         counter ++;
6324                                 }
6325                                 mask <<= 1;
6326                         }
6327
6328                         active_cu_number += counter;
6329                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6330                 }
6331         }
6332
6333         WREG32(RLC_PG_AO_CU_MASK, tmp);
6334
6335         tmp = RREG32(RLC_MAX_PG_CU);
6336         tmp &= ~MAX_PU_CU_MASK;
6337         tmp |= MAX_PU_CU(active_cu_number);
6338         WREG32(RLC_MAX_PG_CU, tmp);
6339 }
6340
6341 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6342                                        bool enable)
6343 {
6344         u32 data, orig;
6345
6346         orig = data = RREG32(RLC_PG_CNTL);
6347         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6348                 data |= STATIC_PER_CU_PG_ENABLE;
6349         else
6350                 data &= ~STATIC_PER_CU_PG_ENABLE;
6351         if (orig != data)
6352                 WREG32(RLC_PG_CNTL, data);
6353 }
6354
6355 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6356                                         bool enable)
6357 {
6358         u32 data, orig;
6359
6360         orig = data = RREG32(RLC_PG_CNTL);
6361         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6362                 data |= DYN_PER_CU_PG_ENABLE;
6363         else
6364                 data &= ~DYN_PER_CU_PG_ENABLE;
6365         if (orig != data)
6366                 WREG32(RLC_PG_CNTL, data);
6367 }
6368
6369 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6370 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6371
6372 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6373 {
6374         u32 data, orig;
6375         u32 i;
6376
6377         if (rdev->rlc.cs_data) {
6378                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6379                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6380                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6381                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6382         } else {
6383                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6384                 for (i = 0; i < 3; i++)
6385                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6386         }
6387         if (rdev->rlc.reg_list) {
6388                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6389                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6390                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6391         }
6392
6393         orig = data = RREG32(RLC_PG_CNTL);
6394         data |= GFX_PG_SRC;
6395         if (orig != data)
6396                 WREG32(RLC_PG_CNTL, data);
6397
6398         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6399         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6400
6401         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6402         data &= ~IDLE_POLL_COUNT_MASK;
6403         data |= IDLE_POLL_COUNT(0x60);
6404         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6405
6406         data = 0x10101010;
6407         WREG32(RLC_PG_DELAY, data);
6408
6409         data = RREG32(RLC_PG_DELAY_2);
6410         data &= ~0xff;
6411         data |= 0x3;
6412         WREG32(RLC_PG_DELAY_2, data);
6413
6414         data = RREG32(RLC_AUTO_PG_CTRL);
6415         data &= ~GRBM_REG_SGIT_MASK;
6416         data |= GRBM_REG_SGIT(0x700);
6417         WREG32(RLC_AUTO_PG_CTRL, data);
6418
6419 }
6420
6421 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6422 {
6423         cik_enable_gfx_cgpg(rdev, enable);
6424         cik_enable_gfx_static_mgpg(rdev, enable);
6425         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6426 }
6427
6428 u32 cik_get_csb_size(struct radeon_device *rdev)
6429 {
6430         u32 count = 0;
6431         const struct cs_section_def *sect = NULL;
6432         const struct cs_extent_def *ext = NULL;
6433
6434         if (rdev->rlc.cs_data == NULL)
6435                 return 0;
6436
6437         /* begin clear state */
6438         count += 2;
6439         /* context control state */
6440         count += 3;
6441
6442         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6443                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6444                         if (sect->id == SECT_CONTEXT)
6445                                 count += 2 + ext->reg_count;
6446                         else
6447                                 return 0;
6448                 }
6449         }
6450         /* pa_sc_raster_config/pa_sc_raster_config1 */
6451         count += 4;
6452         /* end clear state */
6453         count += 2;
6454         /* clear state */
6455         count += 2;
6456
6457         return count;
6458 }
6459
6460 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6461 {
6462         u32 count = 0, i;
6463         const struct cs_section_def *sect = NULL;
6464         const struct cs_extent_def *ext = NULL;
6465
6466         if (rdev->rlc.cs_data == NULL)
6467                 return;
6468         if (buffer == NULL)
6469                 return;
6470
6471         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6472         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6473
6474         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6475         buffer[count++] = cpu_to_le32(0x80000000);
6476         buffer[count++] = cpu_to_le32(0x80000000);
6477
6478         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6479                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6480                         if (sect->id == SECT_CONTEXT) {
6481                                 buffer[count++] =
6482                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6483                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6484                                 for (i = 0; i < ext->reg_count; i++)
6485                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6486                         } else {
6487                                 return;
6488                         }
6489                 }
6490         }
6491
6492         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6493         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6494         switch (rdev->family) {
6495         case CHIP_BONAIRE:
6496                 buffer[count++] = cpu_to_le32(0x16000012);
6497                 buffer[count++] = cpu_to_le32(0x00000000);
6498                 break;
6499         case CHIP_KAVERI:
6500                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6501                 buffer[count++] = cpu_to_le32(0x00000000);
6502                 break;
6503         case CHIP_KABINI:
6504                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6505                 buffer[count++] = cpu_to_le32(0x00000000);
6506                 break;
6507         case CHIP_HAWAII:
6508                 buffer[count++] = 0x3a00161a;
6509                 buffer[count++] = 0x0000002e;
6510                 break;
6511         default:
6512                 buffer[count++] = cpu_to_le32(0x00000000);
6513                 buffer[count++] = cpu_to_le32(0x00000000);
6514                 break;
6515         }
6516
6517         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6518         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6519
6520         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6521         buffer[count++] = cpu_to_le32(0);
6522 }
6523
6524 static void cik_init_pg(struct radeon_device *rdev)
6525 {
6526         if (rdev->pg_flags) {
6527                 cik_enable_sck_slowdown_on_pu(rdev, true);
6528                 cik_enable_sck_slowdown_on_pd(rdev, true);
6529                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6530                         cik_init_gfx_cgpg(rdev);
6531                         cik_enable_cp_pg(rdev, true);
6532                         cik_enable_gds_pg(rdev, true);
6533                 }
6534                 cik_init_ao_cu_mask(rdev);
6535                 cik_update_gfx_pg(rdev, true);
6536         }
6537 }
6538
6539 static void cik_fini_pg(struct radeon_device *rdev)
6540 {
6541         if (rdev->pg_flags) {
6542                 cik_update_gfx_pg(rdev, false);
6543                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6544                         cik_enable_cp_pg(rdev, false);
6545                         cik_enable_gds_pg(rdev, false);
6546                 }
6547         }
6548 }
6549
6550 /*
6551  * Interrupts
6552  * Starting with r6xx, interrupts are handled via a ring buffer.
6553  * Ring buffers are areas of GPU accessible memory that the GPU
6554  * writes interrupt vectors into and the host reads vectors out of.
6555  * There is a rptr (read pointer) that determines where the
6556  * host is currently reading, and a wptr (write pointer)
6557  * which determines where the GPU has written.  When the
6558  * pointers are equal, the ring is idle.  When the GPU
6559  * writes vectors to the ring buffer, it increments the
6560  * wptr.  When there is an interrupt, the host then starts
6561  * fetching commands and processing them until the pointers are
6562  * equal again at which point it updates the rptr.
6563  */
6564
6565 /**
6566  * cik_enable_interrupts - Enable the interrupt ring buffer
6567  *
6568  * @rdev: radeon_device pointer
6569  *
6570  * Enable the interrupt ring buffer (CIK).
6571  */
6572 static void cik_enable_interrupts(struct radeon_device *rdev)
6573 {
6574         u32 ih_cntl = RREG32(IH_CNTL);
6575         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6576
6577         ih_cntl |= ENABLE_INTR;
6578         ih_rb_cntl |= IH_RB_ENABLE;
6579         WREG32(IH_CNTL, ih_cntl);
6580         WREG32(IH_RB_CNTL, ih_rb_cntl);
6581         rdev->ih.enabled = true;
6582 }
6583
6584 /**
6585  * cik_disable_interrupts - Disable the interrupt ring buffer
6586  *
6587  * @rdev: radeon_device pointer
6588  *
6589  * Disable the interrupt ring buffer (CIK).
6590  */
6591 static void cik_disable_interrupts(struct radeon_device *rdev)
6592 {
6593         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6594         u32 ih_cntl = RREG32(IH_CNTL);
6595
6596         ih_rb_cntl &= ~IH_RB_ENABLE;
6597         ih_cntl &= ~ENABLE_INTR;
6598         WREG32(IH_RB_CNTL, ih_rb_cntl);
6599         WREG32(IH_CNTL, ih_cntl);
6600         /* set rptr, wptr to 0 */
6601         WREG32(IH_RB_RPTR, 0);
6602         WREG32(IH_RB_WPTR, 0);
6603         rdev->ih.enabled = false;
6604         rdev->ih.rptr = 0;
6605 }
6606
6607 /**
6608  * cik_disable_interrupt_state - Disable all interrupt sources
6609  *
6610  * @rdev: radeon_device pointer
6611  *
6612  * Clear all interrupt enable bits used by the driver (CIK).
6613  */
6614 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6615 {
6616         u32 tmp;
6617
6618         /* gfx ring */
6619         tmp = RREG32(CP_INT_CNTL_RING0) &
6620                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6621         WREG32(CP_INT_CNTL_RING0, tmp);
6622         /* sdma */
6623         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6624         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6625         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6626         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6627         /* compute queues */
6628         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6629         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6630         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6631         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6632         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6633         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6634         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6635         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6636         /* grbm */
6637         WREG32(GRBM_INT_CNTL, 0);
6638         /* vline/vblank, etc. */
6639         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6640         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6641         if (rdev->num_crtc >= 4) {
6642                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6643                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6644         }
6645         if (rdev->num_crtc >= 6) {
6646                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6647                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6648         }
6649
6650         /* dac hotplug */
6651         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6652
6653         /* digital hotplug */
6654         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6655         WREG32(DC_HPD1_INT_CONTROL, tmp);
6656         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6657         WREG32(DC_HPD2_INT_CONTROL, tmp);
6658         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6659         WREG32(DC_HPD3_INT_CONTROL, tmp);
6660         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6661         WREG32(DC_HPD4_INT_CONTROL, tmp);
6662         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6663         WREG32(DC_HPD5_INT_CONTROL, tmp);
6664         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6665         WREG32(DC_HPD6_INT_CONTROL, tmp);
6666
6667 }
6668
6669 /**
6670  * cik_irq_init - init and enable the interrupt ring
6671  *
6672  * @rdev: radeon_device pointer
6673  *
6674  * Allocate a ring buffer for the interrupt controller,
6675  * enable the RLC, disable interrupts, enable the IH
6676  * ring buffer and enable it (CIK).
6677  * Called at device load and reume.
6678  * Returns 0 for success, errors for failure.
6679  */
6680 static int cik_irq_init(struct radeon_device *rdev)
6681 {
6682         int ret = 0;
6683         int rb_bufsz;
6684         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6685
6686         /* allocate ring */
6687         ret = r600_ih_ring_alloc(rdev);
6688         if (ret)
6689                 return ret;
6690
6691         /* disable irqs */
6692         cik_disable_interrupts(rdev);
6693
6694         /* init rlc */
6695         ret = cik_rlc_resume(rdev);
6696         if (ret) {
6697                 r600_ih_ring_fini(rdev);
6698                 return ret;
6699         }
6700
6701         /* setup interrupt control */
6702         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6703         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6704         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6705         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6706          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6707          */
6708         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6709         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6710         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6711         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6712
6713         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6714         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6715
6716         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6717                       IH_WPTR_OVERFLOW_CLEAR |
6718                       (rb_bufsz << 1));
6719
6720         if (rdev->wb.enabled)
6721                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6722
6723         /* set the writeback address whether it's enabled or not */
6724         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6725         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6726
6727         WREG32(IH_RB_CNTL, ih_rb_cntl);
6728
6729         /* set rptr, wptr to 0 */
6730         WREG32(IH_RB_RPTR, 0);
6731         WREG32(IH_RB_WPTR, 0);
6732
6733         /* Default settings for IH_CNTL (disabled at first) */
6734         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6735         /* RPTR_REARM only works if msi's are enabled */
6736         if (rdev->msi_enabled)
6737                 ih_cntl |= RPTR_REARM;
6738         WREG32(IH_CNTL, ih_cntl);
6739
6740         /* force the active interrupt state to all disabled */
6741         cik_disable_interrupt_state(rdev);
6742
6743         pci_set_master(rdev->pdev);
6744
6745         /* enable irqs */
6746         cik_enable_interrupts(rdev);
6747
6748         return ret;
6749 }
6750
6751 /**
6752  * cik_irq_set - enable/disable interrupt sources
6753  *
6754  * @rdev: radeon_device pointer
6755  *
6756  * Enable interrupt sources on the GPU (vblanks, hpd,
6757  * etc.) (CIK).
6758  * Returns 0 for success, errors for failure.
6759  */
6760 int cik_irq_set(struct radeon_device *rdev)
6761 {
6762         u32 cp_int_cntl;
6763         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6764         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6765         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6766         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6767         u32 grbm_int_cntl = 0;
6768         u32 dma_cntl, dma_cntl1;
6769         u32 thermal_int;
6770
6771         if (!rdev->irq.installed) {
6772                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6773                 return -EINVAL;
6774         }
6775         /* don't enable anything if the ih is disabled */
6776         if (!rdev->ih.enabled) {
6777                 cik_disable_interrupts(rdev);
6778                 /* force the active interrupt state to all disabled */
6779                 cik_disable_interrupt_state(rdev);
6780                 return 0;
6781         }
6782
6783         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6784                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6785         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6786
6787         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6788         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6789         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6790         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6791         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6792         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6793
6794         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6795         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6796
6797         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6798         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6799         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6800         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6801         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6802         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6803         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6804         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6805
6806         if (rdev->flags & RADEON_IS_IGP)
6807                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6808                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6809         else
6810                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6811                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6812
6813         /* enable CP interrupts on all rings */
6814         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6815                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6816                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6817         }
6818         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6819                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6820                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6821                 if (ring->me == 1) {
6822                         switch (ring->pipe) {
6823                         case 0:
6824                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6825                                 break;
6826                         case 1:
6827                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6828                                 break;
6829                         case 2:
6830                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6831                                 break;
6832                         case 3:
6833                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6834                                 break;
6835                         default:
6836                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6837                                 break;
6838                         }
6839                 } else if (ring->me == 2) {
6840                         switch (ring->pipe) {
6841                         case 0:
6842                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6843                                 break;
6844                         case 1:
6845                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6846                                 break;
6847                         case 2:
6848                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6849                                 break;
6850                         case 3:
6851                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6852                                 break;
6853                         default:
6854                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6855                                 break;
6856                         }
6857                 } else {
6858                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6859                 }
6860         }
6861         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6862                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6863                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6864                 if (ring->me == 1) {
6865                         switch (ring->pipe) {
6866                         case 0:
6867                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6868                                 break;
6869                         case 1:
6870                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6871                                 break;
6872                         case 2:
6873                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6874                                 break;
6875                         case 3:
6876                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6877                                 break;
6878                         default:
6879                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6880                                 break;
6881                         }
6882                 } else if (ring->me == 2) {
6883                         switch (ring->pipe) {
6884                         case 0:
6885                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6886                                 break;
6887                         case 1:
6888                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6889                                 break;
6890                         case 2:
6891                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6892                                 break;
6893                         case 3:
6894                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6895                                 break;
6896                         default:
6897                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6898                                 break;
6899                         }
6900                 } else {
6901                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6902                 }
6903         }
6904
6905         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6906                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6907                 dma_cntl |= TRAP_ENABLE;
6908         }
6909
6910         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6911                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6912                 dma_cntl1 |= TRAP_ENABLE;
6913         }
6914
6915         if (rdev->irq.crtc_vblank_int[0] ||
6916             atomic_read(&rdev->irq.pflip[0])) {
6917                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6918                 crtc1 |= VBLANK_INTERRUPT_MASK;
6919         }
6920         if (rdev->irq.crtc_vblank_int[1] ||
6921             atomic_read(&rdev->irq.pflip[1])) {
6922                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6923                 crtc2 |= VBLANK_INTERRUPT_MASK;
6924         }
6925         if (rdev->irq.crtc_vblank_int[2] ||
6926             atomic_read(&rdev->irq.pflip[2])) {
6927                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6928                 crtc3 |= VBLANK_INTERRUPT_MASK;
6929         }
6930         if (rdev->irq.crtc_vblank_int[3] ||
6931             atomic_read(&rdev->irq.pflip[3])) {
6932                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6933                 crtc4 |= VBLANK_INTERRUPT_MASK;
6934         }
6935         if (rdev->irq.crtc_vblank_int[4] ||
6936             atomic_read(&rdev->irq.pflip[4])) {
6937                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6938                 crtc5 |= VBLANK_INTERRUPT_MASK;
6939         }
6940         if (rdev->irq.crtc_vblank_int[5] ||
6941             atomic_read(&rdev->irq.pflip[5])) {
6942                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6943                 crtc6 |= VBLANK_INTERRUPT_MASK;
6944         }
6945         if (rdev->irq.hpd[0]) {
6946                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6947                 hpd1 |= DC_HPDx_INT_EN;
6948         }
6949         if (rdev->irq.hpd[1]) {
6950                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6951                 hpd2 |= DC_HPDx_INT_EN;
6952         }
6953         if (rdev->irq.hpd[2]) {
6954                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6955                 hpd3 |= DC_HPDx_INT_EN;
6956         }
6957         if (rdev->irq.hpd[3]) {
6958                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6959                 hpd4 |= DC_HPDx_INT_EN;
6960         }
6961         if (rdev->irq.hpd[4]) {
6962                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6963                 hpd5 |= DC_HPDx_INT_EN;
6964         }
6965         if (rdev->irq.hpd[5]) {
6966                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6967                 hpd6 |= DC_HPDx_INT_EN;
6968         }
6969
6970         if (rdev->irq.dpm_thermal) {
6971                 DRM_DEBUG("dpm thermal\n");
6972                 if (rdev->flags & RADEON_IS_IGP)
6973                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6974                 else
6975                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6976         }
6977
6978         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6979
6980         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6981         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6982
6983         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6984         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6985         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6986         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6987         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6988         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6989         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6990         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6991
6992         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6993
6994         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6995         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6996         if (rdev->num_crtc >= 4) {
6997                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6998                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6999         }
7000         if (rdev->num_crtc >= 6) {
7001                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7002                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7003         }
7004
7005         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7006         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7007         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7008         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7009         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7010         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7011
7012         if (rdev->flags & RADEON_IS_IGP)
7013                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7014         else
7015                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7016
7017         return 0;
7018 }
7019
7020 /**
7021  * cik_irq_ack - ack interrupt sources
7022  *
7023  * @rdev: radeon_device pointer
7024  *
7025  * Ack interrupt sources on the GPU (vblanks, hpd,
7026  * etc.) (CIK).  Certain interrupts sources are sw
7027  * generated and do not require an explicit ack.
7028  */
7029 static inline void cik_irq_ack(struct radeon_device *rdev)
7030 {
7031         u32 tmp;
7032
7033         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7034         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7035         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7036         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7037         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7038         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7039         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7040
7041         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7042                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7043         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7044                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7045         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7046                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7047         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7048                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7049
7050         if (rdev->num_crtc >= 4) {
7051                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7052                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7053                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7054                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7055                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7056                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7057                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7058                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7059         }
7060
7061         if (rdev->num_crtc >= 6) {
7062                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7063                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7064                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7065                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7066                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7067                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7068                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7069                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7070         }
7071
7072         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7073                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7074                 tmp |= DC_HPDx_INT_ACK;
7075                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7076         }
7077         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7078                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7079                 tmp |= DC_HPDx_INT_ACK;
7080                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7081         }
7082         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7083                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7084                 tmp |= DC_HPDx_INT_ACK;
7085                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7086         }
7087         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7088                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7089                 tmp |= DC_HPDx_INT_ACK;
7090                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7091         }
7092         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7093                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7094                 tmp |= DC_HPDx_INT_ACK;
7095                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7096         }
7097         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7098                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7099                 tmp |= DC_HPDx_INT_ACK;
7100                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7101         }
7102 }
7103
7104 /**
7105  * cik_irq_disable - disable interrupts
7106  *
7107  * @rdev: radeon_device pointer
7108  *
7109  * Disable interrupts on the hw (CIK).
7110  */
7111 static void cik_irq_disable(struct radeon_device *rdev)
7112 {
7113         cik_disable_interrupts(rdev);
7114         /* Wait and acknowledge irq */
7115         mdelay(1);
7116         cik_irq_ack(rdev);
7117         cik_disable_interrupt_state(rdev);
7118 }
7119
7120 /**
7121  * cik_irq_disable - disable interrupts for suspend
7122  *
7123  * @rdev: radeon_device pointer
7124  *
7125  * Disable interrupts and stop the RLC (CIK).
7126  * Used for suspend.
7127  */
7128 static void cik_irq_suspend(struct radeon_device *rdev)
7129 {
7130         cik_irq_disable(rdev);
7131         cik_rlc_stop(rdev);
7132 }
7133
7134 /**
7135  * cik_irq_fini - tear down interrupt support
7136  *
7137  * @rdev: radeon_device pointer
7138  *
7139  * Disable interrupts on the hw and free the IH ring
7140  * buffer (CIK).
7141  * Used for driver unload.
7142  */
7143 static void cik_irq_fini(struct radeon_device *rdev)
7144 {
7145         cik_irq_suspend(rdev);
7146         r600_ih_ring_fini(rdev);
7147 }
7148
7149 /**
7150  * cik_get_ih_wptr - get the IH ring buffer wptr
7151  *
7152  * @rdev: radeon_device pointer
7153  *
7154  * Get the IH ring buffer wptr from either the register
7155  * or the writeback memory buffer (CIK).  Also check for
7156  * ring buffer overflow and deal with it.
7157  * Used by cik_irq_process().
7158  * Returns the value of the wptr.
7159  */
7160 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7161 {
7162         u32 wptr, tmp;
7163
7164         if (rdev->wb.enabled)
7165                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7166         else
7167                 wptr = RREG32(IH_RB_WPTR);
7168
7169         if (wptr & RB_OVERFLOW) {
7170                 /* When a ring buffer overflow happen start parsing interrupt
7171                  * from the last not overwritten vector (wptr + 16). Hopefully
7172                  * this should allow us to catchup.
7173                  */
7174                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7175                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7176                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7177                 tmp = RREG32(IH_RB_CNTL);
7178                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7179                 WREG32(IH_RB_CNTL, tmp);
7180         }
7181         return (wptr & rdev->ih.ptr_mask);
7182 }
7183
7184 /*        CIK IV Ring
7185  * Each IV ring entry is 128 bits:
7186  * [7:0]    - interrupt source id
7187  * [31:8]   - reserved
7188  * [59:32]  - interrupt source data
7189  * [63:60]  - reserved
7190  * [71:64]  - RINGID
7191  *            CP:
7192  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7193  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7194  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7195  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7196  *            PIPE_ID - ME0 0=3D
7197  *                    - ME1&2 compute dispatcher (4 pipes each)
7198  *            SDMA:
7199  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7200  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7201  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7202  * [79:72]  - VMID
7203  * [95:80]  - PASID
7204  * [127:96] - reserved
7205  */
7206 /**
7207  * cik_irq_process - interrupt handler
7208  *
7209  * @rdev: radeon_device pointer
7210  *
7211  * Interrupt hander (CIK).  Walk the IH ring,
7212  * ack interrupts and schedule work to handle
7213  * interrupt events.
7214  * Returns irq process return code.
7215  */
7216 int cik_irq_process(struct radeon_device *rdev)
7217 {
7218         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7219         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7220         u32 wptr;
7221         u32 rptr;
7222         u32 src_id, src_data, ring_id;
7223         u8 me_id, pipe_id, queue_id;
7224         u32 ring_index;
7225         bool queue_hotplug = false;
7226         bool queue_reset = false;
7227         u32 addr, status, mc_client;
7228         bool queue_thermal = false;
7229
7230         if (!rdev->ih.enabled || rdev->shutdown)
7231                 return IRQ_NONE;
7232
7233         wptr = cik_get_ih_wptr(rdev);
7234
7235 restart_ih:
7236         /* is somebody else already processing irqs? */
7237         if (atomic_xchg(&rdev->ih.lock, 1))
7238                 return IRQ_NONE;
7239
7240         rptr = rdev->ih.rptr;
7241         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7242
7243         /* Order reading of wptr vs. reading of IH ring data */
7244         rmb();
7245
7246         /* display interrupts */
7247         cik_irq_ack(rdev);
7248
7249         while (rptr != wptr) {
7250                 /* wptr/rptr are in bytes! */
7251                 ring_index = rptr / 4;
7252                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7253                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7254                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7255
7256                 switch (src_id) {
7257                 case 1: /* D1 vblank/vline */
7258                         switch (src_data) {
7259                         case 0: /* D1 vblank */
7260                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7261                                         if (rdev->irq.crtc_vblank_int[0]) {
7262                                                 drm_handle_vblank(rdev->ddev, 0);
7263                                                 rdev->pm.vblank_sync = true;
7264                                                 wake_up(&rdev->irq.vblank_queue);
7265                                         }
7266                                         if (atomic_read(&rdev->irq.pflip[0]))
7267                                                 radeon_crtc_handle_flip(rdev, 0);
7268                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7269                                         DRM_DEBUG("IH: D1 vblank\n");
7270                                 }
7271                                 break;
7272                         case 1: /* D1 vline */
7273                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7274                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7275                                         DRM_DEBUG("IH: D1 vline\n");
7276                                 }
7277                                 break;
7278                         default:
7279                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7280                                 break;
7281                         }
7282                         break;
7283                 case 2: /* D2 vblank/vline */
7284                         switch (src_data) {
7285                         case 0: /* D2 vblank */
7286                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7287                                         if (rdev->irq.crtc_vblank_int[1]) {
7288                                                 drm_handle_vblank(rdev->ddev, 1);
7289                                                 rdev->pm.vblank_sync = true;
7290                                                 wake_up(&rdev->irq.vblank_queue);
7291                                         }
7292                                         if (atomic_read(&rdev->irq.pflip[1]))
7293                                                 radeon_crtc_handle_flip(rdev, 1);
7294                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7295                                         DRM_DEBUG("IH: D2 vblank\n");
7296                                 }
7297                                 break;
7298                         case 1: /* D2 vline */
7299                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7300                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7301                                         DRM_DEBUG("IH: D2 vline\n");
7302                                 }
7303                                 break;
7304                         default:
7305                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7306                                 break;
7307                         }
7308                         break;
7309                 case 3: /* D3 vblank/vline */
7310                         switch (src_data) {
7311                         case 0: /* D3 vblank */
7312                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7313                                         if (rdev->irq.crtc_vblank_int[2]) {
7314                                                 drm_handle_vblank(rdev->ddev, 2);
7315                                                 rdev->pm.vblank_sync = true;
7316                                                 wake_up(&rdev->irq.vblank_queue);
7317                                         }
7318                                         if (atomic_read(&rdev->irq.pflip[2]))
7319                                                 radeon_crtc_handle_flip(rdev, 2);
7320                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7321                                         DRM_DEBUG("IH: D3 vblank\n");
7322                                 }
7323                                 break;
7324                         case 1: /* D3 vline */
7325                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7326                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7327                                         DRM_DEBUG("IH: D3 vline\n");
7328                                 }
7329                                 break;
7330                         default:
7331                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7332                                 break;
7333                         }
7334                         break;
7335                 case 4: /* D4 vblank/vline */
7336                         switch (src_data) {
7337                         case 0: /* D4 vblank */
7338                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7339                                         if (rdev->irq.crtc_vblank_int[3]) {
7340                                                 drm_handle_vblank(rdev->ddev, 3);
7341                                                 rdev->pm.vblank_sync = true;
7342                                                 wake_up(&rdev->irq.vblank_queue);
7343                                         }
7344                                         if (atomic_read(&rdev->irq.pflip[3]))
7345                                                 radeon_crtc_handle_flip(rdev, 3);
7346                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7347                                         DRM_DEBUG("IH: D4 vblank\n");
7348                                 }
7349                                 break;
7350                         case 1: /* D4 vline */
7351                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7352                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7353                                         DRM_DEBUG("IH: D4 vline\n");
7354                                 }
7355                                 break;
7356                         default:
7357                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7358                                 break;
7359                         }
7360                         break;
7361                 case 5: /* D5 vblank/vline */
7362                         switch (src_data) {
7363                         case 0: /* D5 vblank */
7364                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7365                                         if (rdev->irq.crtc_vblank_int[4]) {
7366                                                 drm_handle_vblank(rdev->ddev, 4);
7367                                                 rdev->pm.vblank_sync = true;
7368                                                 wake_up(&rdev->irq.vblank_queue);
7369                                         }
7370                                         if (atomic_read(&rdev->irq.pflip[4]))
7371                                                 radeon_crtc_handle_flip(rdev, 4);
7372                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7373                                         DRM_DEBUG("IH: D5 vblank\n");
7374                                 }
7375                                 break;
7376                         case 1: /* D5 vline */
7377                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7378                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7379                                         DRM_DEBUG("IH: D5 vline\n");
7380                                 }
7381                                 break;
7382                         default:
7383                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7384                                 break;
7385                         }
7386                         break;
7387                 case 6: /* D6 vblank/vline */
7388                         switch (src_data) {
7389                         case 0: /* D6 vblank */
7390                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7391                                         if (rdev->irq.crtc_vblank_int[5]) {
7392                                                 drm_handle_vblank(rdev->ddev, 5);
7393                                                 rdev->pm.vblank_sync = true;
7394                                                 wake_up(&rdev->irq.vblank_queue);
7395                                         }
7396                                         if (atomic_read(&rdev->irq.pflip[5]))
7397                                                 radeon_crtc_handle_flip(rdev, 5);
7398                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7399                                         DRM_DEBUG("IH: D6 vblank\n");
7400                                 }
7401                                 break;
7402                         case 1: /* D6 vline */
7403                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7404                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7405                                         DRM_DEBUG("IH: D6 vline\n");
7406                                 }
7407                                 break;
7408                         default:
7409                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7410                                 break;
7411                         }
7412                         break;
7413                 case 42: /* HPD hotplug */
7414                         switch (src_data) {
7415                         case 0:
7416                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7417                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7418                                         queue_hotplug = true;
7419                                         DRM_DEBUG("IH: HPD1\n");
7420                                 }
7421                                 break;
7422                         case 1:
7423                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7424                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7425                                         queue_hotplug = true;
7426                                         DRM_DEBUG("IH: HPD2\n");
7427                                 }
7428                                 break;
7429                         case 2:
7430                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7431                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7432                                         queue_hotplug = true;
7433                                         DRM_DEBUG("IH: HPD3\n");
7434                                 }
7435                                 break;
7436                         case 3:
7437                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7438                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7439                                         queue_hotplug = true;
7440                                         DRM_DEBUG("IH: HPD4\n");
7441                                 }
7442                                 break;
7443                         case 4:
7444                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7445                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7446                                         queue_hotplug = true;
7447                                         DRM_DEBUG("IH: HPD5\n");
7448                                 }
7449                                 break;
7450                         case 5:
7451                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7452                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7453                                         queue_hotplug = true;
7454                                         DRM_DEBUG("IH: HPD6\n");
7455                                 }
7456                                 break;
7457                         default:
7458                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7459                                 break;
7460                         }
7461                         break;
7462                 case 124: /* UVD */
7463                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7464                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7465                         break;
7466                 case 146:
7467                 case 147:
7468                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7469                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7470                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7471                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7472                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7473                                 addr);
7474                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7475                                 status);
7476                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7477                         /* reset addr and status */
7478                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7479                         break;
7480                 case 176: /* GFX RB CP_INT */
7481                 case 177: /* GFX IB CP_INT */
7482                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7483                         break;
7484                 case 181: /* CP EOP event */
7485                         DRM_DEBUG("IH: CP EOP\n");
7486                         /* XXX check the bitfield order! */
7487                         me_id = (ring_id & 0x60) >> 5;
7488                         pipe_id = (ring_id & 0x18) >> 3;
7489                         queue_id = (ring_id & 0x7) >> 0;
7490                         switch (me_id) {
7491                         case 0:
7492                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7493                                 break;
7494                         case 1:
7495                         case 2:
7496                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7497                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7498                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7499                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7500                                 break;
7501                         }
7502                         break;
7503                 case 184: /* CP Privileged reg access */
7504                         DRM_ERROR("Illegal register access in command stream\n");
7505                         /* XXX check the bitfield order! */
7506                         me_id = (ring_id & 0x60) >> 5;
7507                         pipe_id = (ring_id & 0x18) >> 3;
7508                         queue_id = (ring_id & 0x7) >> 0;
7509                         switch (me_id) {
7510                         case 0:
7511                                 /* This results in a full GPU reset, but all we need to do is soft
7512                                  * reset the CP for gfx
7513                                  */
7514                                 queue_reset = true;
7515                                 break;
7516                         case 1:
7517                                 /* XXX compute */
7518                                 queue_reset = true;
7519                                 break;
7520                         case 2:
7521                                 /* XXX compute */
7522                                 queue_reset = true;
7523                                 break;
7524                         }
7525                         break;
7526                 case 185: /* CP Privileged inst */
7527                         DRM_ERROR("Illegal instruction in command stream\n");
7528                         /* XXX check the bitfield order! */
7529                         me_id = (ring_id & 0x60) >> 5;
7530                         pipe_id = (ring_id & 0x18) >> 3;
7531                         queue_id = (ring_id & 0x7) >> 0;
7532                         switch (me_id) {
7533                         case 0:
7534                                 /* This results in a full GPU reset, but all we need to do is soft
7535                                  * reset the CP for gfx
7536                                  */
7537                                 queue_reset = true;
7538                                 break;
7539                         case 1:
7540                                 /* XXX compute */
7541                                 queue_reset = true;
7542                                 break;
7543                         case 2:
7544                                 /* XXX compute */
7545                                 queue_reset = true;
7546                                 break;
7547                         }
7548                         break;
7549                 case 224: /* SDMA trap event */
7550                         /* XXX check the bitfield order! */
7551                         me_id = (ring_id & 0x3) >> 0;
7552                         queue_id = (ring_id & 0xc) >> 2;
7553                         DRM_DEBUG("IH: SDMA trap\n");
7554                         switch (me_id) {
7555                         case 0:
7556                                 switch (queue_id) {
7557                                 case 0:
7558                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7559                                         break;
7560                                 case 1:
7561                                         /* XXX compute */
7562                                         break;
7563                                 case 2:
7564                                         /* XXX compute */
7565                                         break;
7566                                 }
7567                                 break;
7568                         case 1:
7569                                 switch (queue_id) {
7570                                 case 0:
7571                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7572                                         break;
7573                                 case 1:
7574                                         /* XXX compute */
7575                                         break;
7576                                 case 2:
7577                                         /* XXX compute */
7578                                         break;
7579                                 }
7580                                 break;
7581                         }
7582                         break;
7583                 case 230: /* thermal low to high */
7584                         DRM_DEBUG("IH: thermal low to high\n");
7585                         rdev->pm.dpm.thermal.high_to_low = false;
7586                         queue_thermal = true;
7587                         break;
7588                 case 231: /* thermal high to low */
7589                         DRM_DEBUG("IH: thermal high to low\n");
7590                         rdev->pm.dpm.thermal.high_to_low = true;
7591                         queue_thermal = true;
7592                         break;
7593                 case 233: /* GUI IDLE */
7594                         DRM_DEBUG("IH: GUI idle\n");
7595                         break;
7596                 case 241: /* SDMA Privileged inst */
7597                 case 247: /* SDMA Privileged inst */
7598                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7599                         /* XXX check the bitfield order! */
7600                         me_id = (ring_id & 0x3) >> 0;
7601                         queue_id = (ring_id & 0xc) >> 2;
7602                         switch (me_id) {
7603                         case 0:
7604                                 switch (queue_id) {
7605                                 case 0:
7606                                         queue_reset = true;
7607                                         break;
7608                                 case 1:
7609                                         /* XXX compute */
7610                                         queue_reset = true;
7611                                         break;
7612                                 case 2:
7613                                         /* XXX compute */
7614                                         queue_reset = true;
7615                                         break;
7616                                 }
7617                                 break;
7618                         case 1:
7619                                 switch (queue_id) {
7620                                 case 0:
7621                                         queue_reset = true;
7622                                         break;
7623                                 case 1:
7624                                         /* XXX compute */
7625                                         queue_reset = true;
7626                                         break;
7627                                 case 2:
7628                                         /* XXX compute */
7629                                         queue_reset = true;
7630                                         break;
7631                                 }
7632                                 break;
7633                         }
7634                         break;
7635                 default:
7636                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7637                         break;
7638                 }
7639
7640                 /* wptr/rptr are in bytes! */
7641                 rptr += 16;
7642                 rptr &= rdev->ih.ptr_mask;
7643         }
7644         if (queue_hotplug)
7645                 schedule_work(&rdev->hotplug_work);
7646         if (queue_reset)
7647                 schedule_work(&rdev->reset_work);
7648         if (queue_thermal)
7649                 schedule_work(&rdev->pm.dpm.thermal.work);
7650         rdev->ih.rptr = rptr;
7651         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7652         atomic_set(&rdev->ih.lock, 0);
7653
7654         /* make sure wptr hasn't changed while processing */
7655         wptr = cik_get_ih_wptr(rdev);
7656         if (wptr != rptr)
7657                 goto restart_ih;
7658
7659         return IRQ_HANDLED;
7660 }
7661
7662 /*
7663  * startup/shutdown callbacks
7664  */
7665 /**
7666  * cik_startup - program the asic to a functional state
7667  *
7668  * @rdev: radeon_device pointer
7669  *
7670  * Programs the asic to a functional state (CIK).
7671  * Called by cik_init() and cik_resume().
7672  * Returns 0 for success, error for failure.
7673  */
7674 static int cik_startup(struct radeon_device *rdev)
7675 {
7676         struct radeon_ring *ring;
7677         int r;
7678
7679         /* enable pcie gen2/3 link */
7680         cik_pcie_gen3_enable(rdev);
7681         /* enable aspm */
7682         cik_program_aspm(rdev);
7683
7684         /* scratch needs to be initialized before MC */
7685         r = r600_vram_scratch_init(rdev);
7686         if (r)
7687                 return r;
7688
7689         cik_mc_program(rdev);
7690
7691         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7692                 r = ci_mc_load_microcode(rdev);
7693                 if (r) {
7694                         DRM_ERROR("Failed to load MC firmware!\n");
7695                         return r;
7696                 }
7697         }
7698
7699         r = cik_pcie_gart_enable(rdev);
7700         if (r)
7701                 return r;
7702         cik_gpu_init(rdev);
7703
7704         /* allocate rlc buffers */
7705         if (rdev->flags & RADEON_IS_IGP) {
7706                 if (rdev->family == CHIP_KAVERI) {
7707                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7708                         rdev->rlc.reg_list_size =
7709                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7710                 } else {
7711                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7712                         rdev->rlc.reg_list_size =
7713                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7714                 }
7715         }
7716         rdev->rlc.cs_data = ci_cs_data;
7717         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7718         r = sumo_rlc_init(rdev);
7719         if (r) {
7720                 DRM_ERROR("Failed to init rlc BOs!\n");
7721                 return r;
7722         }
7723
7724         /* allocate wb buffer */
7725         r = radeon_wb_init(rdev);
7726         if (r)
7727                 return r;
7728
7729         /* allocate mec buffers */
7730         r = cik_mec_init(rdev);
7731         if (r) {
7732                 DRM_ERROR("Failed to init MEC BOs!\n");
7733                 return r;
7734         }
7735
7736         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7737         if (r) {
7738                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7739                 return r;
7740         }
7741
7742         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7743         if (r) {
7744                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7745                 return r;
7746         }
7747
7748         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7749         if (r) {
7750                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7751                 return r;
7752         }
7753
7754         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7755         if (r) {
7756                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7757                 return r;
7758         }
7759
7760         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7761         if (r) {
7762                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7763                 return r;
7764         }
7765
7766         r = radeon_uvd_resume(rdev);
7767         if (!r) {
7768                 r = uvd_v4_2_resume(rdev);
7769                 if (!r) {
7770                         r = radeon_fence_driver_start_ring(rdev,
7771                                                            R600_RING_TYPE_UVD_INDEX);
7772                         if (r)
7773                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7774                 }
7775         }
7776         if (r)
7777                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7778
7779         /* Enable IRQ */
7780         if (!rdev->irq.installed) {
7781                 r = radeon_irq_kms_init(rdev);
7782                 if (r)
7783                         return r;
7784         }
7785
7786         r = cik_irq_init(rdev);
7787         if (r) {
7788                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7789                 radeon_irq_kms_fini(rdev);
7790                 return r;
7791         }
7792         cik_irq_set(rdev);
7793
7794         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7795         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7796                              PACKET3(PACKET3_NOP, 0x3FFF));
7797         if (r)
7798                 return r;
7799
7800         /* set up the compute queues */
7801         /* type-2 packets are deprecated on MEC, use type-3 instead */
7802         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7803         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7804                              PACKET3(PACKET3_NOP, 0x3FFF));
7805         if (r)
7806                 return r;
7807         ring->me = 1; /* first MEC */
7808         ring->pipe = 0; /* first pipe */
7809         ring->queue = 0; /* first queue */
7810         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7811
7812         /* type-2 packets are deprecated on MEC, use type-3 instead */
7813         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7814         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7815                              PACKET3(PACKET3_NOP, 0x3FFF));
7816         if (r)
7817                 return r;
7818         /* dGPU only have 1 MEC */
7819         ring->me = 1; /* first MEC */
7820         ring->pipe = 0; /* first pipe */
7821         ring->queue = 1; /* second queue */
7822         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7823
7824         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7825         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7826                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7827         if (r)
7828                 return r;
7829
7830         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7831         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7832                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7833         if (r)
7834                 return r;
7835
7836         r = cik_cp_resume(rdev);
7837         if (r)
7838                 return r;
7839
7840         r = cik_sdma_resume(rdev);
7841         if (r)
7842                 return r;
7843
7844         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7845         if (ring->ring_size) {
7846                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7847                                      RADEON_CP_PACKET2);
7848                 if (!r)
7849                         r = uvd_v1_0_init(rdev);
7850                 if (r)
7851                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7852         }
7853
7854         r = radeon_ib_pool_init(rdev);
7855         if (r) {
7856                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7857                 return r;
7858         }
7859
7860         r = radeon_vm_manager_init(rdev);
7861         if (r) {
7862                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7863                 return r;
7864         }
7865
7866         r = dce6_audio_init(rdev);
7867         if (r)
7868                 return r;
7869
7870         return 0;
7871 }
7872
7873 /**
7874  * cik_resume - resume the asic to a functional state
7875  *
7876  * @rdev: radeon_device pointer
7877  *
7878  * Programs the asic to a functional state (CIK).
7879  * Called at resume.
7880  * Returns 0 for success, error for failure.
7881  */
7882 int cik_resume(struct radeon_device *rdev)
7883 {
7884         int r;
7885
7886         /* post card */
7887         atom_asic_init(rdev->mode_info.atom_context);
7888
7889         /* init golden registers */
7890         cik_init_golden_registers(rdev);
7891
7892         radeon_pm_resume(rdev);
7893
7894         rdev->accel_working = true;
7895         r = cik_startup(rdev);
7896         if (r) {
7897                 DRM_ERROR("cik startup failed on resume\n");
7898                 rdev->accel_working = false;
7899                 return r;
7900         }
7901
7902         return r;
7903
7904 }
7905
7906 /**
7907  * cik_suspend - suspend the asic
7908  *
7909  * @rdev: radeon_device pointer
7910  *
7911  * Bring the chip into a state suitable for suspend (CIK).
7912  * Called at suspend.
7913  * Returns 0 for success.
7914  */
7915 int cik_suspend(struct radeon_device *rdev)
7916 {
7917         radeon_pm_suspend(rdev);
7918         dce6_audio_fini(rdev);
7919         radeon_vm_manager_fini(rdev);
7920         cik_cp_enable(rdev, false);
7921         cik_sdma_enable(rdev, false);
7922         uvd_v1_0_fini(rdev);
7923         radeon_uvd_suspend(rdev);
7924         cik_fini_pg(rdev);
7925         cik_fini_cg(rdev);
7926         cik_irq_suspend(rdev);
7927         radeon_wb_disable(rdev);
7928         cik_pcie_gart_disable(rdev);
7929         return 0;
7930 }
7931
7932 /* Plan is to move initialization in that function and use
7933  * helper function so that radeon_device_init pretty much
7934  * do nothing more than calling asic specific function. This
7935  * should also allow to remove a bunch of callback function
7936  * like vram_info.
7937  */
7938 /**
7939  * cik_init - asic specific driver and hw init
7940  *
7941  * @rdev: radeon_device pointer
7942  *
7943  * Setup asic specific driver variables and program the hw
7944  * to a functional state (CIK).
7945  * Called at driver startup.
7946  * Returns 0 for success, errors for failure.
7947  */
7948 int cik_init(struct radeon_device *rdev)
7949 {
7950         struct radeon_ring *ring;
7951         int r;
7952
7953         /* Read BIOS */
7954         if (!radeon_get_bios(rdev)) {
7955                 if (ASIC_IS_AVIVO(rdev))
7956                         return -EINVAL;
7957         }
7958         /* Must be an ATOMBIOS */
7959         if (!rdev->is_atom_bios) {
7960                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7961                 return -EINVAL;
7962         }
7963         r = radeon_atombios_init(rdev);
7964         if (r)
7965                 return r;
7966
7967         /* Post card if necessary */
7968         if (!radeon_card_posted(rdev)) {
7969                 if (!rdev->bios) {
7970                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7971                         return -EINVAL;
7972                 }
7973                 DRM_INFO("GPU not posted. posting now...\n");
7974                 atom_asic_init(rdev->mode_info.atom_context);
7975         }
7976         /* init golden registers */
7977         cik_init_golden_registers(rdev);
7978         /* Initialize scratch registers */
7979         cik_scratch_init(rdev);
7980         /* Initialize surface registers */
7981         radeon_surface_init(rdev);
7982         /* Initialize clocks */
7983         radeon_get_clock_info(rdev->ddev);
7984
7985         /* Fence driver */
7986         r = radeon_fence_driver_init(rdev);
7987         if (r)
7988                 return r;
7989
7990         /* initialize memory controller */
7991         r = cik_mc_init(rdev);
7992         if (r)
7993                 return r;
7994         /* Memory manager */
7995         r = radeon_bo_init(rdev);
7996         if (r)
7997                 return r;
7998
7999         if (rdev->flags & RADEON_IS_IGP) {
8000                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8001                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8002                         r = cik_init_microcode(rdev);
8003                         if (r) {
8004                                 DRM_ERROR("Failed to load firmware!\n");
8005                                 return r;
8006                         }
8007                 }
8008         } else {
8009                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8010                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8011                     !rdev->mc_fw) {
8012                         r = cik_init_microcode(rdev);
8013                         if (r) {
8014                                 DRM_ERROR("Failed to load firmware!\n");
8015                                 return r;
8016                         }
8017                 }
8018         }
8019
8020         /* Initialize power management */
8021         radeon_pm_init(rdev);
8022
8023         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8024         ring->ring_obj = NULL;
8025         r600_ring_init(rdev, ring, 1024 * 1024);
8026
8027         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8028         ring->ring_obj = NULL;
8029         r600_ring_init(rdev, ring, 1024 * 1024);
8030         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8031         if (r)
8032                 return r;
8033
8034         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8035         ring->ring_obj = NULL;
8036         r600_ring_init(rdev, ring, 1024 * 1024);
8037         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8038         if (r)
8039                 return r;
8040
8041         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8042         ring->ring_obj = NULL;
8043         r600_ring_init(rdev, ring, 256 * 1024);
8044
8045         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8046         ring->ring_obj = NULL;
8047         r600_ring_init(rdev, ring, 256 * 1024);
8048
8049         r = radeon_uvd_init(rdev);
8050         if (!r) {
8051                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8052                 ring->ring_obj = NULL;
8053                 r600_ring_init(rdev, ring, 4096);
8054         }
8055
8056         rdev->ih.ring_obj = NULL;
8057         r600_ih_ring_init(rdev, 64 * 1024);
8058
8059         r = r600_pcie_gart_init(rdev);
8060         if (r)
8061                 return r;
8062
8063         rdev->accel_working = true;
8064         r = cik_startup(rdev);
8065         if (r) {
8066                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8067                 cik_cp_fini(rdev);
8068                 cik_sdma_fini(rdev);
8069                 cik_irq_fini(rdev);
8070                 sumo_rlc_fini(rdev);
8071                 cik_mec_fini(rdev);
8072                 radeon_wb_fini(rdev);
8073                 radeon_ib_pool_fini(rdev);
8074                 radeon_vm_manager_fini(rdev);
8075                 radeon_irq_kms_fini(rdev);
8076                 cik_pcie_gart_fini(rdev);
8077                 rdev->accel_working = false;
8078         }
8079
8080         /* Don't start up if the MC ucode is missing.
8081          * The default clocks and voltages before the MC ucode
8082          * is loaded are not suffient for advanced operations.
8083          */
8084         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8085                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8086                 return -EINVAL;
8087         }
8088
8089         return 0;
8090 }
8091
8092 /**
8093  * cik_fini - asic specific driver and hw fini
8094  *
8095  * @rdev: radeon_device pointer
8096  *
8097  * Tear down the asic specific driver variables and program the hw
8098  * to an idle state (CIK).
8099  * Called at driver unload.
8100  */
8101 void cik_fini(struct radeon_device *rdev)
8102 {
8103         radeon_pm_fini(rdev);
8104         cik_cp_fini(rdev);
8105         cik_sdma_fini(rdev);
8106         cik_fini_pg(rdev);
8107         cik_fini_cg(rdev);
8108         cik_irq_fini(rdev);
8109         sumo_rlc_fini(rdev);
8110         cik_mec_fini(rdev);
8111         radeon_wb_fini(rdev);
8112         radeon_vm_manager_fini(rdev);
8113         radeon_ib_pool_fini(rdev);
8114         radeon_irq_kms_fini(rdev);
8115         uvd_v1_0_fini(rdev);
8116         radeon_uvd_fini(rdev);
8117         cik_pcie_gart_fini(rdev);
8118         r600_vram_scratch_fini(rdev);
8119         radeon_gem_fini(rdev);
8120         radeon_fence_driver_fini(rdev);
8121         radeon_bo_fini(rdev);
8122         radeon_atombios_fini(rdev);
8123         kfree(rdev->bios);
8124         rdev->bios = NULL;
8125 }
8126
8127 void dce8_program_fmt(struct drm_encoder *encoder)
8128 {
8129         struct drm_device *dev = encoder->dev;
8130         struct radeon_device *rdev = dev->dev_private;
8131         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8132         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8133         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8134         int bpc = 0;
8135         u32 tmp = 0;
8136         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8137
8138         if (connector) {
8139                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8140                 bpc = radeon_get_monitor_bpc(connector);
8141                 dither = radeon_connector->dither;
8142         }
8143
8144         /* LVDS/eDP FMT is set up by atom */
8145         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8146                 return;
8147
8148         /* not needed for analog */
8149         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8150             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8151                 return;
8152
8153         if (bpc == 0)
8154                 return;
8155
8156         switch (bpc) {
8157         case 6:
8158                 if (dither == RADEON_FMT_DITHER_ENABLE)
8159                         /* XXX sort out optimal dither settings */
8160                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8161                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8162                 else
8163                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8164                 break;
8165         case 8:
8166                 if (dither == RADEON_FMT_DITHER_ENABLE)
8167                         /* XXX sort out optimal dither settings */
8168                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8169                                 FMT_RGB_RANDOM_ENABLE |
8170                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8171                 else
8172                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8173                 break;
8174         case 10:
8175                 if (dither == RADEON_FMT_DITHER_ENABLE)
8176                         /* XXX sort out optimal dither settings */
8177                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8178                                 FMT_RGB_RANDOM_ENABLE |
8179                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8180                 else
8181                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8182                 break;
8183         default:
8184                 /* not needed */
8185                 break;
8186         }
8187
8188         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8189 }
8190
8191 /* display watermark setup */
8192 /**
8193  * dce8_line_buffer_adjust - Set up the line buffer
8194  *
8195  * @rdev: radeon_device pointer
8196  * @radeon_crtc: the selected display controller
8197  * @mode: the current display mode on the selected display
8198  * controller
8199  *
8200  * Setup up the line buffer allocation for
8201  * the selected display controller (CIK).
8202  * Returns the line buffer size in pixels.
8203  */
8204 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8205                                    struct radeon_crtc *radeon_crtc,
8206                                    struct drm_display_mode *mode)
8207 {
8208         u32 tmp, buffer_alloc, i;
8209         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8210         /*
8211          * Line Buffer Setup
8212          * There are 6 line buffers, one for each display controllers.
8213          * There are 3 partitions per LB. Select the number of partitions
8214          * to enable based on the display width.  For display widths larger
8215          * than 4096, you need use to use 2 display controllers and combine
8216          * them using the stereo blender.
8217          */
8218         if (radeon_crtc->base.enabled && mode) {
8219                 if (mode->crtc_hdisplay < 1920) {
8220                         tmp = 1;
8221                         buffer_alloc = 2;
8222                 } else if (mode->crtc_hdisplay < 2560) {
8223                         tmp = 2;
8224                         buffer_alloc = 2;
8225                 } else if (mode->crtc_hdisplay < 4096) {
8226                         tmp = 0;
8227                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8228                 } else {
8229                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8230                         tmp = 0;
8231                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8232                 }
8233         } else {
8234                 tmp = 1;
8235                 buffer_alloc = 0;
8236         }
8237
8238         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8239                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8240
8241         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8242                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8243         for (i = 0; i < rdev->usec_timeout; i++) {
8244                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8245                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8246                         break;
8247                 udelay(1);
8248         }
8249
8250         if (radeon_crtc->base.enabled && mode) {
8251                 switch (tmp) {
8252                 case 0:
8253                 default:
8254                         return 4096 * 2;
8255                 case 1:
8256                         return 1920 * 2;
8257                 case 2:
8258                         return 2560 * 2;
8259                 }
8260         }
8261
8262         /* controller not enabled, so no lb used */
8263         return 0;
8264 }
8265
8266 /**
8267  * cik_get_number_of_dram_channels - get the number of dram channels
8268  *
8269  * @rdev: radeon_device pointer
8270  *
8271  * Look up the number of video ram channels (CIK).
8272  * Used for display watermark bandwidth calculations
8273  * Returns the number of dram channels
8274  */
8275 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8276 {
8277         u32 tmp = RREG32(MC_SHARED_CHMAP);
8278
8279         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8280         case 0:
8281         default:
8282                 return 1;
8283         case 1:
8284                 return 2;
8285         case 2:
8286                 return 4;
8287         case 3:
8288                 return 8;
8289         case 4:
8290                 return 3;
8291         case 5:
8292                 return 6;
8293         case 6:
8294                 return 10;
8295         case 7:
8296                 return 12;
8297         case 8:
8298                 return 16;
8299         }
8300 }
8301
8302 struct dce8_wm_params {
8303         u32 dram_channels; /* number of dram channels */
8304         u32 yclk;          /* bandwidth per dram data pin in kHz */
8305         u32 sclk;          /* engine clock in kHz */
8306         u32 disp_clk;      /* display clock in kHz */
8307         u32 src_width;     /* viewport width */
8308         u32 active_time;   /* active display time in ns */
8309         u32 blank_time;    /* blank time in ns */
8310         bool interlaced;    /* mode is interlaced */
8311         fixed20_12 vsc;    /* vertical scale ratio */
8312         u32 num_heads;     /* number of active crtcs */
8313         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8314         u32 lb_size;       /* line buffer allocated to pipe */
8315         u32 vtaps;         /* vertical scaler taps */
8316 };
8317
8318 /**
8319  * dce8_dram_bandwidth - get the dram bandwidth
8320  *
8321  * @wm: watermark calculation data
8322  *
8323  * Calculate the raw dram bandwidth (CIK).
8324  * Used for display watermark bandwidth calculations
8325  * Returns the dram bandwidth in MBytes/s
8326  */
8327 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8328 {
8329         /* Calculate raw DRAM Bandwidth */
8330         fixed20_12 dram_efficiency; /* 0.7 */
8331         fixed20_12 yclk, dram_channels, bandwidth;
8332         fixed20_12 a;
8333
8334         a.full = dfixed_const(1000);
8335         yclk.full = dfixed_const(wm->yclk);
8336         yclk.full = dfixed_div(yclk, a);
8337         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8338         a.full = dfixed_const(10);
8339         dram_efficiency.full = dfixed_const(7);
8340         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8341         bandwidth.full = dfixed_mul(dram_channels, yclk);
8342         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8343
8344         return dfixed_trunc(bandwidth);
8345 }
8346
8347 /**
8348  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8349  *
8350  * @wm: watermark calculation data
8351  *
8352  * Calculate the dram bandwidth used for display (CIK).
8353  * Used for display watermark bandwidth calculations
8354  * Returns the dram bandwidth for display in MBytes/s
8355  */
8356 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8357 {
8358         /* Calculate DRAM Bandwidth and the part allocated to display. */
8359         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8360         fixed20_12 yclk, dram_channels, bandwidth;
8361         fixed20_12 a;
8362
8363         a.full = dfixed_const(1000);
8364         yclk.full = dfixed_const(wm->yclk);
8365         yclk.full = dfixed_div(yclk, a);
8366         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8367         a.full = dfixed_const(10);
8368         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8369         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8370         bandwidth.full = dfixed_mul(dram_channels, yclk);
8371         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8372
8373         return dfixed_trunc(bandwidth);
8374 }
8375
8376 /**
8377  * dce8_data_return_bandwidth - get the data return bandwidth
8378  *
8379  * @wm: watermark calculation data
8380  *
8381  * Calculate the data return bandwidth used for display (CIK).
8382  * Used for display watermark bandwidth calculations
8383  * Returns the data return bandwidth in MBytes/s
8384  */
8385 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8386 {
8387         /* Calculate the display Data return Bandwidth */
8388         fixed20_12 return_efficiency; /* 0.8 */
8389         fixed20_12 sclk, bandwidth;
8390         fixed20_12 a;
8391
8392         a.full = dfixed_const(1000);
8393         sclk.full = dfixed_const(wm->sclk);
8394         sclk.full = dfixed_div(sclk, a);
8395         a.full = dfixed_const(10);
8396         return_efficiency.full = dfixed_const(8);
8397         return_efficiency.full = dfixed_div(return_efficiency, a);
8398         a.full = dfixed_const(32);
8399         bandwidth.full = dfixed_mul(a, sclk);
8400         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8401
8402         return dfixed_trunc(bandwidth);
8403 }
8404
8405 /**
8406  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8407  *
8408  * @wm: watermark calculation data
8409  *
8410  * Calculate the dmif bandwidth used for display (CIK).
8411  * Used for display watermark bandwidth calculations
8412  * Returns the dmif bandwidth in MBytes/s
8413  */
8414 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8415 {
8416         /* Calculate the DMIF Request Bandwidth */
8417         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8418         fixed20_12 disp_clk, bandwidth;
8419         fixed20_12 a, b;
8420
8421         a.full = dfixed_const(1000);
8422         disp_clk.full = dfixed_const(wm->disp_clk);
8423         disp_clk.full = dfixed_div(disp_clk, a);
8424         a.full = dfixed_const(32);
8425         b.full = dfixed_mul(a, disp_clk);
8426
8427         a.full = dfixed_const(10);
8428         disp_clk_request_efficiency.full = dfixed_const(8);
8429         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8430
8431         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8432
8433         return dfixed_trunc(bandwidth);
8434 }
8435
8436 /**
8437  * dce8_available_bandwidth - get the min available bandwidth
8438  *
8439  * @wm: watermark calculation data
8440  *
8441  * Calculate the min available bandwidth used for display (CIK).
8442  * Used for display watermark bandwidth calculations
8443  * Returns the min available bandwidth in MBytes/s
8444  */
8445 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8446 {
8447         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8448         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8449         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8450         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8451
8452         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8453 }
8454
8455 /**
8456  * dce8_average_bandwidth - get the average available bandwidth
8457  *
8458  * @wm: watermark calculation data
8459  *
8460  * Calculate the average available bandwidth used for display (CIK).
8461  * Used for display watermark bandwidth calculations
8462  * Returns the average available bandwidth in MBytes/s
8463  */
8464 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8465 {
8466         /* Calculate the display mode Average Bandwidth
8467          * DisplayMode should contain the source and destination dimensions,
8468          * timing, etc.
8469          */
8470         fixed20_12 bpp;
8471         fixed20_12 line_time;
8472         fixed20_12 src_width;
8473         fixed20_12 bandwidth;
8474         fixed20_12 a;
8475
8476         a.full = dfixed_const(1000);
8477         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8478         line_time.full = dfixed_div(line_time, a);
8479         bpp.full = dfixed_const(wm->bytes_per_pixel);
8480         src_width.full = dfixed_const(wm->src_width);
8481         bandwidth.full = dfixed_mul(src_width, bpp);
8482         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8483         bandwidth.full = dfixed_div(bandwidth, line_time);
8484
8485         return dfixed_trunc(bandwidth);
8486 }
8487
8488 /**
8489  * dce8_latency_watermark - get the latency watermark
8490  *
8491  * @wm: watermark calculation data
8492  *
8493  * Calculate the latency watermark (CIK).
8494  * Used for display watermark bandwidth calculations
8495  * Returns the latency watermark in ns
8496  */
8497 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8498 {
8499         /* First calculate the latency in ns */
8500         u32 mc_latency = 2000; /* 2000 ns. */
8501         u32 available_bandwidth = dce8_available_bandwidth(wm);
8502         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8503         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8504         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8505         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8506                 (wm->num_heads * cursor_line_pair_return_time);
8507         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8508         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8509         u32 tmp, dmif_size = 12288;
8510         fixed20_12 a, b, c;
8511
8512         if (wm->num_heads == 0)
8513                 return 0;
8514
8515         a.full = dfixed_const(2);
8516         b.full = dfixed_const(1);
8517         if ((wm->vsc.full > a.full) ||
8518             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8519             (wm->vtaps >= 5) ||
8520             ((wm->vsc.full >= a.full) && wm->interlaced))
8521                 max_src_lines_per_dst_line = 4;
8522         else
8523                 max_src_lines_per_dst_line = 2;
8524
8525         a.full = dfixed_const(available_bandwidth);
8526         b.full = dfixed_const(wm->num_heads);
8527         a.full = dfixed_div(a, b);
8528
8529         b.full = dfixed_const(mc_latency + 512);
8530         c.full = dfixed_const(wm->disp_clk);
8531         b.full = dfixed_div(b, c);
8532
8533         c.full = dfixed_const(dmif_size);
8534         b.full = dfixed_div(c, b);
8535
8536         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8537
8538         b.full = dfixed_const(1000);
8539         c.full = dfixed_const(wm->disp_clk);
8540         b.full = dfixed_div(c, b);
8541         c.full = dfixed_const(wm->bytes_per_pixel);
8542         b.full = dfixed_mul(b, c);
8543
8544         lb_fill_bw = min(tmp, dfixed_trunc(b));
8545
8546         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8547         b.full = dfixed_const(1000);
8548         c.full = dfixed_const(lb_fill_bw);
8549         b.full = dfixed_div(c, b);
8550         a.full = dfixed_div(a, b);
8551         line_fill_time = dfixed_trunc(a);
8552
8553         if (line_fill_time < wm->active_time)
8554                 return latency;
8555         else
8556                 return latency + (line_fill_time - wm->active_time);
8557
8558 }
8559
8560 /**
8561  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8562  * average and available dram bandwidth
8563  *
8564  * @wm: watermark calculation data
8565  *
8566  * Check if the display average bandwidth fits in the display
8567  * dram bandwidth (CIK).
8568  * Used for display watermark bandwidth calculations
8569  * Returns true if the display fits, false if not.
8570  */
8571 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8572 {
8573         if (dce8_average_bandwidth(wm) <=
8574             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8575                 return true;
8576         else
8577                 return false;
8578 }
8579
8580 /**
8581  * dce8_average_bandwidth_vs_available_bandwidth - check
8582  * average and available bandwidth
8583  *
8584  * @wm: watermark calculation data
8585  *
8586  * Check if the display average bandwidth fits in the display
8587  * available bandwidth (CIK).
8588  * Used for display watermark bandwidth calculations
8589  * Returns true if the display fits, false if not.
8590  */
8591 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8592 {
8593         if (dce8_average_bandwidth(wm) <=
8594             (dce8_available_bandwidth(wm) / wm->num_heads))
8595                 return true;
8596         else
8597                 return false;
8598 }
8599
8600 /**
8601  * dce8_check_latency_hiding - check latency hiding
8602  *
8603  * @wm: watermark calculation data
8604  *
8605  * Check latency hiding (CIK).
8606  * Used for display watermark bandwidth calculations
8607  * Returns true if the display fits, false if not.
8608  */
8609 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8610 {
8611         u32 lb_partitions = wm->lb_size / wm->src_width;
8612         u32 line_time = wm->active_time + wm->blank_time;
8613         u32 latency_tolerant_lines;
8614         u32 latency_hiding;
8615         fixed20_12 a;
8616
8617         a.full = dfixed_const(1);
8618         if (wm->vsc.full > a.full)
8619                 latency_tolerant_lines = 1;
8620         else {
8621                 if (lb_partitions <= (wm->vtaps + 1))
8622                         latency_tolerant_lines = 1;
8623                 else
8624                         latency_tolerant_lines = 2;
8625         }
8626
8627         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8628
8629         if (dce8_latency_watermark(wm) <= latency_hiding)
8630                 return true;
8631         else
8632                 return false;
8633 }
8634
8635 /**
8636  * dce8_program_watermarks - program display watermarks
8637  *
8638  * @rdev: radeon_device pointer
8639  * @radeon_crtc: the selected display controller
8640  * @lb_size: line buffer size
8641  * @num_heads: number of display controllers in use
8642  *
8643  * Calculate and program the display watermarks for the
8644  * selected display controller (CIK).
8645  */
8646 static void dce8_program_watermarks(struct radeon_device *rdev,
8647                                     struct radeon_crtc *radeon_crtc,
8648                                     u32 lb_size, u32 num_heads)
8649 {
8650         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8651         struct dce8_wm_params wm_low, wm_high;
8652         u32 pixel_period;
8653         u32 line_time = 0;
8654         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8655         u32 tmp, wm_mask;
8656
8657         if (radeon_crtc->base.enabled && num_heads && mode) {
8658                 pixel_period = 1000000 / (u32)mode->clock;
8659                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8660
8661                 /* watermark for high clocks */
8662                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8663                     rdev->pm.dpm_enabled) {
8664                         wm_high.yclk =
8665                                 radeon_dpm_get_mclk(rdev, false) * 10;
8666                         wm_high.sclk =
8667                                 radeon_dpm_get_sclk(rdev, false) * 10;
8668                 } else {
8669                         wm_high.yclk = rdev->pm.current_mclk * 10;
8670                         wm_high.sclk = rdev->pm.current_sclk * 10;
8671                 }
8672
8673                 wm_high.disp_clk = mode->clock;
8674                 wm_high.src_width = mode->crtc_hdisplay;
8675                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8676                 wm_high.blank_time = line_time - wm_high.active_time;
8677                 wm_high.interlaced = false;
8678                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8679                         wm_high.interlaced = true;
8680                 wm_high.vsc = radeon_crtc->vsc;
8681                 wm_high.vtaps = 1;
8682                 if (radeon_crtc->rmx_type != RMX_OFF)
8683                         wm_high.vtaps = 2;
8684                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8685                 wm_high.lb_size = lb_size;
8686                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8687                 wm_high.num_heads = num_heads;
8688
8689                 /* set for high clocks */
8690                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8691
8692                 /* possibly force display priority to high */
8693                 /* should really do this at mode validation time... */
8694                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8695                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8696                     !dce8_check_latency_hiding(&wm_high) ||
8697                     (rdev->disp_priority == 2)) {
8698                         DRM_DEBUG_KMS("force priority to high\n");
8699                 }
8700
8701                 /* watermark for low clocks */
8702                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8703                     rdev->pm.dpm_enabled) {
8704                         wm_low.yclk =
8705                                 radeon_dpm_get_mclk(rdev, true) * 10;
8706                         wm_low.sclk =
8707                                 radeon_dpm_get_sclk(rdev, true) * 10;
8708                 } else {
8709                         wm_low.yclk = rdev->pm.current_mclk * 10;
8710                         wm_low.sclk = rdev->pm.current_sclk * 10;
8711                 }
8712
8713                 wm_low.disp_clk = mode->clock;
8714                 wm_low.src_width = mode->crtc_hdisplay;
8715                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8716                 wm_low.blank_time = line_time - wm_low.active_time;
8717                 wm_low.interlaced = false;
8718                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8719                         wm_low.interlaced = true;
8720                 wm_low.vsc = radeon_crtc->vsc;
8721                 wm_low.vtaps = 1;
8722                 if (radeon_crtc->rmx_type != RMX_OFF)
8723                         wm_low.vtaps = 2;
8724                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8725                 wm_low.lb_size = lb_size;
8726                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8727                 wm_low.num_heads = num_heads;
8728
8729                 /* set for low clocks */
8730                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8731
8732                 /* possibly force display priority to high */
8733                 /* should really do this at mode validation time... */
8734                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8735                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8736                     !dce8_check_latency_hiding(&wm_low) ||
8737                     (rdev->disp_priority == 2)) {
8738                         DRM_DEBUG_KMS("force priority to high\n");
8739                 }
8740         }
8741
8742         /* select wm A */
8743         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8744         tmp = wm_mask;
8745         tmp &= ~LATENCY_WATERMARK_MASK(3);
8746         tmp |= LATENCY_WATERMARK_MASK(1);
8747         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8748         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8749                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8750                 LATENCY_HIGH_WATERMARK(line_time)));
8751         /* select wm B */
8752         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8753         tmp &= ~LATENCY_WATERMARK_MASK(3);
8754         tmp |= LATENCY_WATERMARK_MASK(2);
8755         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8756         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8757                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8758                 LATENCY_HIGH_WATERMARK(line_time)));
8759         /* restore original selection */
8760         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8761
8762         /* save values for DPM */
8763         radeon_crtc->line_time = line_time;
8764         radeon_crtc->wm_high = latency_watermark_a;
8765         radeon_crtc->wm_low = latency_watermark_b;
8766 }
8767
8768 /**
8769  * dce8_bandwidth_update - program display watermarks
8770  *
8771  * @rdev: radeon_device pointer
8772  *
8773  * Calculate and program the display watermarks and line
8774  * buffer allocation (CIK).
8775  */
8776 void dce8_bandwidth_update(struct radeon_device *rdev)
8777 {
8778         struct drm_display_mode *mode = NULL;
8779         u32 num_heads = 0, lb_size;
8780         int i;
8781
8782         radeon_update_display_priority(rdev);
8783
8784         for (i = 0; i < rdev->num_crtc; i++) {
8785                 if (rdev->mode_info.crtcs[i]->base.enabled)
8786                         num_heads++;
8787         }
8788         for (i = 0; i < rdev->num_crtc; i++) {
8789                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8790                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8791                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8792         }
8793 }
8794
8795 /**
8796  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8797  *
8798  * @rdev: radeon_device pointer
8799  *
8800  * Fetches a GPU clock counter snapshot (SI).
8801  * Returns the 64 bit clock counter snapshot.
8802  */
8803 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8804 {
8805         uint64_t clock;
8806
8807         mutex_lock(&rdev->gpu_clock_mutex);
8808         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8809         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8810                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8811         mutex_unlock(&rdev->gpu_clock_mutex);
8812         return clock;
8813 }
8814
8815 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8816                               u32 cntl_reg, u32 status_reg)
8817 {
8818         int r, i;
8819         struct atom_clock_dividers dividers;
8820         uint32_t tmp;
8821
8822         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8823                                            clock, false, &dividers);
8824         if (r)
8825                 return r;
8826
8827         tmp = RREG32_SMC(cntl_reg);
8828         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8829         tmp |= dividers.post_divider;
8830         WREG32_SMC(cntl_reg, tmp);
8831
8832         for (i = 0; i < 100; i++) {
8833                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8834                         break;
8835                 mdelay(10);
8836         }
8837         if (i == 100)
8838                 return -ETIMEDOUT;
8839
8840         return 0;
8841 }
8842
8843 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8844 {
8845         int r = 0;
8846
8847         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8848         if (r)
8849                 return r;
8850
8851         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8852         return r;
8853 }
8854
8855 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8856 {
8857         struct pci_dev *root = rdev->pdev->bus->self;
8858         int bridge_pos, gpu_pos;
8859         u32 speed_cntl, mask, current_data_rate;
8860         int ret, i;
8861         u16 tmp16;
8862
8863         if (radeon_pcie_gen2 == 0)
8864                 return;
8865
8866         if (rdev->flags & RADEON_IS_IGP)
8867                 return;
8868
8869         if (!(rdev->flags & RADEON_IS_PCIE))
8870                 return;
8871
8872         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8873         if (ret != 0)
8874                 return;
8875
8876         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8877                 return;
8878
8879         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8880         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8881                 LC_CURRENT_DATA_RATE_SHIFT;
8882         if (mask & DRM_PCIE_SPEED_80) {
8883                 if (current_data_rate == 2) {
8884                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8885                         return;
8886                 }
8887                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8888         } else if (mask & DRM_PCIE_SPEED_50) {
8889                 if (current_data_rate == 1) {
8890                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8891                         return;
8892                 }
8893                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8894         }
8895
8896         bridge_pos = pci_pcie_cap(root);
8897         if (!bridge_pos)
8898                 return;
8899
8900         gpu_pos = pci_pcie_cap(rdev->pdev);
8901         if (!gpu_pos)
8902                 return;
8903
8904         if (mask & DRM_PCIE_SPEED_80) {
8905                 /* re-try equalization if gen3 is not already enabled */
8906                 if (current_data_rate != 2) {
8907                         u16 bridge_cfg, gpu_cfg;
8908                         u16 bridge_cfg2, gpu_cfg2;
8909                         u32 max_lw, current_lw, tmp;
8910
8911                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8912                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8913
8914                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8915                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8916
8917                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8918                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8919
8920                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8921                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8922                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8923
8924                         if (current_lw < max_lw) {
8925                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8926                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8927                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8928                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8929                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8930                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8931                                 }
8932                         }
8933
8934                         for (i = 0; i < 10; i++) {
8935                                 /* check status */
8936                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8937                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8938                                         break;
8939
8940                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8941                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8942
8943                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8944                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8945
8946                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8947                                 tmp |= LC_SET_QUIESCE;
8948                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8949
8950                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8951                                 tmp |= LC_REDO_EQ;
8952                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8953
8954                                 mdelay(100);
8955
8956                                 /* linkctl */
8957                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8958                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8959                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8960                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8961
8962                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8963                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8964                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8965                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8966
8967                                 /* linkctl2 */
8968                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8969                                 tmp16 &= ~((1 << 4) | (7 << 9));
8970                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8971                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8972
8973                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8974                                 tmp16 &= ~((1 << 4) | (7 << 9));
8975                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8976                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8977
8978                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8979                                 tmp &= ~LC_SET_QUIESCE;
8980                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8981                         }
8982                 }
8983         }
8984
8985         /* set the link speed */
8986         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8987         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8988         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8989
8990         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8991         tmp16 &= ~0xf;
8992         if (mask & DRM_PCIE_SPEED_80)
8993                 tmp16 |= 3; /* gen3 */
8994         else if (mask & DRM_PCIE_SPEED_50)
8995                 tmp16 |= 2; /* gen2 */
8996         else
8997                 tmp16 |= 1; /* gen1 */
8998         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8999
9000         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9001         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9002         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9003
9004         for (i = 0; i < rdev->usec_timeout; i++) {
9005                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9006                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9007                         break;
9008                 udelay(1);
9009         }
9010 }
9011
9012 static void cik_program_aspm(struct radeon_device *rdev)
9013 {
9014         u32 data, orig;
9015         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9016         bool disable_clkreq = false;
9017
9018         if (radeon_aspm == 0)
9019                 return;
9020
9021         /* XXX double check IGPs */
9022         if (rdev->flags & RADEON_IS_IGP)
9023                 return;
9024
9025         if (!(rdev->flags & RADEON_IS_PCIE))
9026                 return;
9027
9028         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9029         data &= ~LC_XMIT_N_FTS_MASK;
9030         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9031         if (orig != data)
9032                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9033
9034         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9035         data |= LC_GO_TO_RECOVERY;
9036         if (orig != data)
9037                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9038
9039         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9040         data |= P_IGNORE_EDB_ERR;
9041         if (orig != data)
9042                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9043
9044         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9045         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9046         data |= LC_PMI_TO_L1_DIS;
9047         if (!disable_l0s)
9048                 data |= LC_L0S_INACTIVITY(7);
9049
9050         if (!disable_l1) {
9051                 data |= LC_L1_INACTIVITY(7);
9052                 data &= ~LC_PMI_TO_L1_DIS;
9053                 if (orig != data)
9054                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9055
9056                 if (!disable_plloff_in_l1) {
9057                         bool clk_req_support;
9058
9059                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9060                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9061                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9062                         if (orig != data)
9063                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9064
9065                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9066                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9067                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9068                         if (orig != data)
9069                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9070
9071                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9072                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9073                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9074                         if (orig != data)
9075                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9076
9077                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9078                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9079                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9080                         if (orig != data)
9081                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9082
9083                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9084                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9085                         data |= LC_DYN_LANES_PWR_STATE(3);
9086                         if (orig != data)
9087                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9088
9089                         if (!disable_clkreq) {
9090                                 struct pci_dev *root = rdev->pdev->bus->self;
9091                                 u32 lnkcap;
9092
9093                                 clk_req_support = false;
9094                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9095                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9096                                         clk_req_support = true;
9097                         } else {
9098                                 clk_req_support = false;
9099                         }
9100
9101                         if (clk_req_support) {
9102                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9103                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9104                                 if (orig != data)
9105                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9106
9107                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9108                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9109                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9110                                 if (orig != data)
9111                                         WREG32_SMC(THM_CLK_CNTL, data);
9112
9113                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9114                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9115                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9116                                 if (orig != data)
9117                                         WREG32_SMC(MISC_CLK_CTRL, data);
9118
9119                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9120                                 data &= ~BCLK_AS_XCLK;
9121                                 if (orig != data)
9122                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9123
9124                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9125                                 data &= ~FORCE_BIF_REFCLK_EN;
9126                                 if (orig != data)
9127                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9128
9129                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9130                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9131                                 data |= MPLL_CLKOUT_SEL(4);
9132                                 if (orig != data)
9133                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9134                         }
9135                 }
9136         } else {
9137                 if (orig != data)
9138                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9139         }
9140
9141         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9142         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9143         if (orig != data)
9144                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9145
9146         if (!disable_l0s) {
9147                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9148                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9149                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9150                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9151                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9152                                 data &= ~LC_L0S_INACTIVITY_MASK;
9153                                 if (orig != data)
9154                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9155                         }
9156                 }
9157         }
9158 }