drm/radeon: rework UVD writeback & [rw]ptr handling
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 static void cik_rlc_stop(struct radeon_device *rdev);
68 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
69 static void cik_program_aspm(struct radeon_device *rdev);
70 static void cik_init_pg(struct radeon_device *rdev);
71 static void cik_init_cg(struct radeon_device *rdev);
72 void cik_uvd_resume(struct radeon_device *rdev);
73
74 /* get temperature in millidegrees */
75 int ci_get_temp(struct radeon_device *rdev)
76 {
77         u32 temp;
78         int actual_temp = 0;
79
80         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
81                 CTF_TEMP_SHIFT;
82
83         if (temp & 0x200)
84                 actual_temp = 255;
85         else
86                 actual_temp = temp & 0x1ff;
87
88         actual_temp = actual_temp * 1000;
89
90         return actual_temp;
91 }
92
93 /* get temperature in millidegrees */
94 int kv_get_temp(struct radeon_device *rdev)
95 {
96         u32 temp;
97         int actual_temp = 0;
98
99         temp = RREG32_SMC(0xC0300E0C);
100
101         if (temp)
102                 actual_temp = (temp / 8) - 49;
103         else
104                 actual_temp = 0;
105
106         actual_temp = actual_temp * 1000;
107
108         return actual_temp;
109 }
110
111 /*
112  * Indirect registers accessor
113  */
114 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
115 {
116         u32 r;
117
118         WREG32(PCIE_INDEX, reg);
119         (void)RREG32(PCIE_INDEX);
120         r = RREG32(PCIE_DATA);
121         return r;
122 }
123
124 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
125 {
126         WREG32(PCIE_INDEX, reg);
127         (void)RREG32(PCIE_INDEX);
128         WREG32(PCIE_DATA, v);
129         (void)RREG32(PCIE_DATA);
130 }
131
132 static const u32 spectre_rlc_save_restore_register_list[] =
133 {
134         (0x0e00 << 16) | (0xc12c >> 2),
135         0x00000000,
136         (0x0e00 << 16) | (0xc140 >> 2),
137         0x00000000,
138         (0x0e00 << 16) | (0xc150 >> 2),
139         0x00000000,
140         (0x0e00 << 16) | (0xc15c >> 2),
141         0x00000000,
142         (0x0e00 << 16) | (0xc168 >> 2),
143         0x00000000,
144         (0x0e00 << 16) | (0xc170 >> 2),
145         0x00000000,
146         (0x0e00 << 16) | (0xc178 >> 2),
147         0x00000000,
148         (0x0e00 << 16) | (0xc204 >> 2),
149         0x00000000,
150         (0x0e00 << 16) | (0xc2b4 >> 2),
151         0x00000000,
152         (0x0e00 << 16) | (0xc2b8 >> 2),
153         0x00000000,
154         (0x0e00 << 16) | (0xc2bc >> 2),
155         0x00000000,
156         (0x0e00 << 16) | (0xc2c0 >> 2),
157         0x00000000,
158         (0x0e00 << 16) | (0x8228 >> 2),
159         0x00000000,
160         (0x0e00 << 16) | (0x829c >> 2),
161         0x00000000,
162         (0x0e00 << 16) | (0x869c >> 2),
163         0x00000000,
164         (0x0600 << 16) | (0x98f4 >> 2),
165         0x00000000,
166         (0x0e00 << 16) | (0x98f8 >> 2),
167         0x00000000,
168         (0x0e00 << 16) | (0x9900 >> 2),
169         0x00000000,
170         (0x0e00 << 16) | (0xc260 >> 2),
171         0x00000000,
172         (0x0e00 << 16) | (0x90e8 >> 2),
173         0x00000000,
174         (0x0e00 << 16) | (0x3c000 >> 2),
175         0x00000000,
176         (0x0e00 << 16) | (0x3c00c >> 2),
177         0x00000000,
178         (0x0e00 << 16) | (0x8c1c >> 2),
179         0x00000000,
180         (0x0e00 << 16) | (0x9700 >> 2),
181         0x00000000,
182         (0x0e00 << 16) | (0xcd20 >> 2),
183         0x00000000,
184         (0x4e00 << 16) | (0xcd20 >> 2),
185         0x00000000,
186         (0x5e00 << 16) | (0xcd20 >> 2),
187         0x00000000,
188         (0x6e00 << 16) | (0xcd20 >> 2),
189         0x00000000,
190         (0x7e00 << 16) | (0xcd20 >> 2),
191         0x00000000,
192         (0x8e00 << 16) | (0xcd20 >> 2),
193         0x00000000,
194         (0x9e00 << 16) | (0xcd20 >> 2),
195         0x00000000,
196         (0xae00 << 16) | (0xcd20 >> 2),
197         0x00000000,
198         (0xbe00 << 16) | (0xcd20 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0x89bc >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0x8900 >> 2),
203         0x00000000,
204         0x3,
205         (0x0e00 << 16) | (0xc130 >> 2),
206         0x00000000,
207         (0x0e00 << 16) | (0xc134 >> 2),
208         0x00000000,
209         (0x0e00 << 16) | (0xc1fc >> 2),
210         0x00000000,
211         (0x0e00 << 16) | (0xc208 >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0xc264 >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0xc268 >> 2),
216         0x00000000,
217         (0x0e00 << 16) | (0xc26c >> 2),
218         0x00000000,
219         (0x0e00 << 16) | (0xc270 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0xc274 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0xc278 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0xc27c >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0xc280 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc284 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xc288 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0xc28c >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0xc290 >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0xc294 >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0xc298 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xc29c >> 2),
242         0x00000000,
243         (0x0e00 << 16) | (0xc2a0 >> 2),
244         0x00000000,
245         (0x0e00 << 16) | (0xc2a4 >> 2),
246         0x00000000,
247         (0x0e00 << 16) | (0xc2a8 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0xc2ac  >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0xc2b0 >> 2),
252         0x00000000,
253         (0x0e00 << 16) | (0x301d0 >> 2),
254         0x00000000,
255         (0x0e00 << 16) | (0x30238 >> 2),
256         0x00000000,
257         (0x0e00 << 16) | (0x30250 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0x30254 >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0x30258 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0x3025c >> 2),
264         0x00000000,
265         (0x4e00 << 16) | (0xc900 >> 2),
266         0x00000000,
267         (0x5e00 << 16) | (0xc900 >> 2),
268         0x00000000,
269         (0x6e00 << 16) | (0xc900 >> 2),
270         0x00000000,
271         (0x7e00 << 16) | (0xc900 >> 2),
272         0x00000000,
273         (0x8e00 << 16) | (0xc900 >> 2),
274         0x00000000,
275         (0x9e00 << 16) | (0xc900 >> 2),
276         0x00000000,
277         (0xae00 << 16) | (0xc900 >> 2),
278         0x00000000,
279         (0xbe00 << 16) | (0xc900 >> 2),
280         0x00000000,
281         (0x4e00 << 16) | (0xc904 >> 2),
282         0x00000000,
283         (0x5e00 << 16) | (0xc904 >> 2),
284         0x00000000,
285         (0x6e00 << 16) | (0xc904 >> 2),
286         0x00000000,
287         (0x7e00 << 16) | (0xc904 >> 2),
288         0x00000000,
289         (0x8e00 << 16) | (0xc904 >> 2),
290         0x00000000,
291         (0x9e00 << 16) | (0xc904 >> 2),
292         0x00000000,
293         (0xae00 << 16) | (0xc904 >> 2),
294         0x00000000,
295         (0xbe00 << 16) | (0xc904 >> 2),
296         0x00000000,
297         (0x4e00 << 16) | (0xc908 >> 2),
298         0x00000000,
299         (0x5e00 << 16) | (0xc908 >> 2),
300         0x00000000,
301         (0x6e00 << 16) | (0xc908 >> 2),
302         0x00000000,
303         (0x7e00 << 16) | (0xc908 >> 2),
304         0x00000000,
305         (0x8e00 << 16) | (0xc908 >> 2),
306         0x00000000,
307         (0x9e00 << 16) | (0xc908 >> 2),
308         0x00000000,
309         (0xae00 << 16) | (0xc908 >> 2),
310         0x00000000,
311         (0xbe00 << 16) | (0xc908 >> 2),
312         0x00000000,
313         (0x4e00 << 16) | (0xc90c >> 2),
314         0x00000000,
315         (0x5e00 << 16) | (0xc90c >> 2),
316         0x00000000,
317         (0x6e00 << 16) | (0xc90c >> 2),
318         0x00000000,
319         (0x7e00 << 16) | (0xc90c >> 2),
320         0x00000000,
321         (0x8e00 << 16) | (0xc90c >> 2),
322         0x00000000,
323         (0x9e00 << 16) | (0xc90c >> 2),
324         0x00000000,
325         (0xae00 << 16) | (0xc90c >> 2),
326         0x00000000,
327         (0xbe00 << 16) | (0xc90c >> 2),
328         0x00000000,
329         (0x4e00 << 16) | (0xc910 >> 2),
330         0x00000000,
331         (0x5e00 << 16) | (0xc910 >> 2),
332         0x00000000,
333         (0x6e00 << 16) | (0xc910 >> 2),
334         0x00000000,
335         (0x7e00 << 16) | (0xc910 >> 2),
336         0x00000000,
337         (0x8e00 << 16) | (0xc910 >> 2),
338         0x00000000,
339         (0x9e00 << 16) | (0xc910 >> 2),
340         0x00000000,
341         (0xae00 << 16) | (0xc910 >> 2),
342         0x00000000,
343         (0xbe00 << 16) | (0xc910 >> 2),
344         0x00000000,
345         (0x0e00 << 16) | (0xc99c >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0x9834 >> 2),
348         0x00000000,
349         (0x0000 << 16) | (0x30f00 >> 2),
350         0x00000000,
351         (0x0001 << 16) | (0x30f00 >> 2),
352         0x00000000,
353         (0x0000 << 16) | (0x30f04 >> 2),
354         0x00000000,
355         (0x0001 << 16) | (0x30f04 >> 2),
356         0x00000000,
357         (0x0000 << 16) | (0x30f08 >> 2),
358         0x00000000,
359         (0x0001 << 16) | (0x30f08 >> 2),
360         0x00000000,
361         (0x0000 << 16) | (0x30f0c >> 2),
362         0x00000000,
363         (0x0001 << 16) | (0x30f0c >> 2),
364         0x00000000,
365         (0x0600 << 16) | (0x9b7c >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0x8a14 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0x8a18 >> 2),
370         0x00000000,
371         (0x0600 << 16) | (0x30a00 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0x8bf0 >> 2),
374         0x00000000,
375         (0x0e00 << 16) | (0x8bcc >> 2),
376         0x00000000,
377         (0x0e00 << 16) | (0x8b24 >> 2),
378         0x00000000,
379         (0x0e00 << 16) | (0x30a04 >> 2),
380         0x00000000,
381         (0x0600 << 16) | (0x30a10 >> 2),
382         0x00000000,
383         (0x0600 << 16) | (0x30a14 >> 2),
384         0x00000000,
385         (0x0600 << 16) | (0x30a18 >> 2),
386         0x00000000,
387         (0x0600 << 16) | (0x30a2c >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0xc700 >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0xc704 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0xc708 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0xc768 >> 2),
396         0x00000000,
397         (0x0400 << 16) | (0xc770 >> 2),
398         0x00000000,
399         (0x0400 << 16) | (0xc774 >> 2),
400         0x00000000,
401         (0x0400 << 16) | (0xc778 >> 2),
402         0x00000000,
403         (0x0400 << 16) | (0xc77c >> 2),
404         0x00000000,
405         (0x0400 << 16) | (0xc780 >> 2),
406         0x00000000,
407         (0x0400 << 16) | (0xc784 >> 2),
408         0x00000000,
409         (0x0400 << 16) | (0xc788 >> 2),
410         0x00000000,
411         (0x0400 << 16) | (0xc78c >> 2),
412         0x00000000,
413         (0x0400 << 16) | (0xc798 >> 2),
414         0x00000000,
415         (0x0400 << 16) | (0xc79c >> 2),
416         0x00000000,
417         (0x0400 << 16) | (0xc7a0 >> 2),
418         0x00000000,
419         (0x0400 << 16) | (0xc7a4 >> 2),
420         0x00000000,
421         (0x0400 << 16) | (0xc7a8 >> 2),
422         0x00000000,
423         (0x0400 << 16) | (0xc7ac >> 2),
424         0x00000000,
425         (0x0400 << 16) | (0xc7b0 >> 2),
426         0x00000000,
427         (0x0400 << 16) | (0xc7b4 >> 2),
428         0x00000000,
429         (0x0e00 << 16) | (0x9100 >> 2),
430         0x00000000,
431         (0x0e00 << 16) | (0x3c010 >> 2),
432         0x00000000,
433         (0x0e00 << 16) | (0x92a8 >> 2),
434         0x00000000,
435         (0x0e00 << 16) | (0x92ac >> 2),
436         0x00000000,
437         (0x0e00 << 16) | (0x92b4 >> 2),
438         0x00000000,
439         (0x0e00 << 16) | (0x92b8 >> 2),
440         0x00000000,
441         (0x0e00 << 16) | (0x92bc >> 2),
442         0x00000000,
443         (0x0e00 << 16) | (0x92c0 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x92c4 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0x92c8 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x92cc >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x92d0 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x8c00 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x8c04 >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x8c20 >> 2),
458         0x00000000,
459         (0x0e00 << 16) | (0x8c38 >> 2),
460         0x00000000,
461         (0x0e00 << 16) | (0x8c3c >> 2),
462         0x00000000,
463         (0x0e00 << 16) | (0xae00 >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0x9604 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0xac08 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0xac0c >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0xac10 >> 2),
472         0x00000000,
473         (0x0e00 << 16) | (0xac14 >> 2),
474         0x00000000,
475         (0x0e00 << 16) | (0xac58 >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0xac68 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0xac6c >> 2),
480         0x00000000,
481         (0x0e00 << 16) | (0xac70 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0xac74 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xac78 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0xac7c >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0xac80 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0xac84 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0xac88 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0xac8c >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x970c >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0x9714 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0x9718 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0x971c >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x31068 >> 2),
506         0x00000000,
507         (0x4e00 << 16) | (0x31068 >> 2),
508         0x00000000,
509         (0x5e00 << 16) | (0x31068 >> 2),
510         0x00000000,
511         (0x6e00 << 16) | (0x31068 >> 2),
512         0x00000000,
513         (0x7e00 << 16) | (0x31068 >> 2),
514         0x00000000,
515         (0x8e00 << 16) | (0x31068 >> 2),
516         0x00000000,
517         (0x9e00 << 16) | (0x31068 >> 2),
518         0x00000000,
519         (0xae00 << 16) | (0x31068 >> 2),
520         0x00000000,
521         (0xbe00 << 16) | (0x31068 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0xcd10 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0xcd14 >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x88b0 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x88b4 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0x88b8 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0x88bc >> 2),
534         0x00000000,
535         (0x0400 << 16) | (0x89c0 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0x88c4 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0x88c8 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x88d0 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0x88d4 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x88d8 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x8980 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x30938 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x3093c >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x30940 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x89a0 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x30900 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x30904 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x89b4 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x3c210 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x3c214 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x3c218 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x8904 >> 2),
570         0x00000000,
571         0x5,
572         (0x0e00 << 16) | (0x8c28 >> 2),
573         (0x0e00 << 16) | (0x8c2c >> 2),
574         (0x0e00 << 16) | (0x8c30 >> 2),
575         (0x0e00 << 16) | (0x8c34 >> 2),
576         (0x0e00 << 16) | (0x9600 >> 2),
577 };
578
579 static const u32 kalindi_rlc_save_restore_register_list[] =
580 {
581         (0x0e00 << 16) | (0xc12c >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0xc140 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0xc150 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0xc15c >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0xc168 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0xc170 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0xc204 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0xc2b4 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0xc2b8 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xc2bc >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xc2c0 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x8228 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x829c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x869c >> 2),
608         0x00000000,
609         (0x0600 << 16) | (0x98f4 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x98f8 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x9900 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xc260 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x90e8 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x3c000 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x3c00c >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x8c1c >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x9700 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0xcd20 >> 2),
628         0x00000000,
629         (0x4e00 << 16) | (0xcd20 >> 2),
630         0x00000000,
631         (0x5e00 << 16) | (0xcd20 >> 2),
632         0x00000000,
633         (0x6e00 << 16) | (0xcd20 >> 2),
634         0x00000000,
635         (0x7e00 << 16) | (0xcd20 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x89bc >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x8900 >> 2),
640         0x00000000,
641         0x3,
642         (0x0e00 << 16) | (0xc130 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0xc134 >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0xc1fc >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xc208 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0xc264 >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0xc268 >> 2),
653         0x00000000,
654         (0x0e00 << 16) | (0xc26c >> 2),
655         0x00000000,
656         (0x0e00 << 16) | (0xc270 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xc274 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc28c >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0xc290 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc294 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0xc298 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0xc2a0 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0xc2a4 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0xc2a8 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc2ac >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x301d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x30238 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x30250 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x30254 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30258 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3025c >> 2),
687         0x00000000,
688         (0x4e00 << 16) | (0xc900 >> 2),
689         0x00000000,
690         (0x5e00 << 16) | (0xc900 >> 2),
691         0x00000000,
692         (0x6e00 << 16) | (0xc900 >> 2),
693         0x00000000,
694         (0x7e00 << 16) | (0xc900 >> 2),
695         0x00000000,
696         (0x4e00 << 16) | (0xc904 >> 2),
697         0x00000000,
698         (0x5e00 << 16) | (0xc904 >> 2),
699         0x00000000,
700         (0x6e00 << 16) | (0xc904 >> 2),
701         0x00000000,
702         (0x7e00 << 16) | (0xc904 >> 2),
703         0x00000000,
704         (0x4e00 << 16) | (0xc908 >> 2),
705         0x00000000,
706         (0x5e00 << 16) | (0xc908 >> 2),
707         0x00000000,
708         (0x6e00 << 16) | (0xc908 >> 2),
709         0x00000000,
710         (0x7e00 << 16) | (0xc908 >> 2),
711         0x00000000,
712         (0x4e00 << 16) | (0xc90c >> 2),
713         0x00000000,
714         (0x5e00 << 16) | (0xc90c >> 2),
715         0x00000000,
716         (0x6e00 << 16) | (0xc90c >> 2),
717         0x00000000,
718         (0x7e00 << 16) | (0xc90c >> 2),
719         0x00000000,
720         (0x4e00 << 16) | (0xc910 >> 2),
721         0x00000000,
722         (0x5e00 << 16) | (0xc910 >> 2),
723         0x00000000,
724         (0x6e00 << 16) | (0xc910 >> 2),
725         0x00000000,
726         (0x7e00 << 16) | (0xc910 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc99c >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0x9834 >> 2),
731         0x00000000,
732         (0x0000 << 16) | (0x30f00 >> 2),
733         0x00000000,
734         (0x0000 << 16) | (0x30f04 >> 2),
735         0x00000000,
736         (0x0000 << 16) | (0x30f08 >> 2),
737         0x00000000,
738         (0x0000 << 16) | (0x30f0c >> 2),
739         0x00000000,
740         (0x0600 << 16) | (0x9b7c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x8a14 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x8a18 >> 2),
745         0x00000000,
746         (0x0600 << 16) | (0x30a00 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x8bf0 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0x8bcc >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x8b24 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x30a04 >> 2),
755         0x00000000,
756         (0x0600 << 16) | (0x30a10 >> 2),
757         0x00000000,
758         (0x0600 << 16) | (0x30a14 >> 2),
759         0x00000000,
760         (0x0600 << 16) | (0x30a18 >> 2),
761         0x00000000,
762         (0x0600 << 16) | (0x30a2c >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xc700 >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xc704 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0xc708 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0xc768 >> 2),
771         0x00000000,
772         (0x0400 << 16) | (0xc770 >> 2),
773         0x00000000,
774         (0x0400 << 16) | (0xc774 >> 2),
775         0x00000000,
776         (0x0400 << 16) | (0xc798 >> 2),
777         0x00000000,
778         (0x0400 << 16) | (0xc79c >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0x9100 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0x3c010 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0x8c00 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0x8c04 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0x8c20 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0x8c38 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0x8c3c >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0xae00 >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x9604 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0xac08 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0xac0c >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0xac10 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xac14 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0xac58 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0xac68 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0xac6c >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0xac70 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0xac74 >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0xac78 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0xac7c >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xac80 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xac84 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xac88 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xac8c >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0x970c >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0x9714 >> 2),
831         0x00000000,
832         (0x0e00 << 16) | (0x9718 >> 2),
833         0x00000000,
834         (0x0e00 << 16) | (0x971c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0x31068 >> 2),
837         0x00000000,
838         (0x4e00 << 16) | (0x31068 >> 2),
839         0x00000000,
840         (0x5e00 << 16) | (0x31068 >> 2),
841         0x00000000,
842         (0x6e00 << 16) | (0x31068 >> 2),
843         0x00000000,
844         (0x7e00 << 16) | (0x31068 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xcd10 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0xcd14 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x88b0 >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x88b4 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x88b8 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x88bc >> 2),
857         0x00000000,
858         (0x0400 << 16) | (0x89c0 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x88c4 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x88c8 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x88d0 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x88d4 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x88d8 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x8980 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x30938 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0x3093c >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0x30940 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0x89a0 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0x30900 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x30904 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x89b4 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x3e1fc >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x3c210 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x3c214 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x3c218 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x8904 >> 2),
895         0x00000000,
896         0x5,
897         (0x0e00 << 16) | (0x8c28 >> 2),
898         (0x0e00 << 16) | (0x8c2c >> 2),
899         (0x0e00 << 16) | (0x8c30 >> 2),
900         (0x0e00 << 16) | (0x8c34 >> 2),
901         (0x0e00 << 16) | (0x9600 >> 2),
902 };
903
904 static const u32 bonaire_golden_spm_registers[] =
905 {
906         0x30800, 0xe0ffffff, 0xe0000000
907 };
908
909 static const u32 bonaire_golden_common_registers[] =
910 {
911         0xc770, 0xffffffff, 0x00000800,
912         0xc774, 0xffffffff, 0x00000800,
913         0xc798, 0xffffffff, 0x00007fbf,
914         0xc79c, 0xffffffff, 0x00007faf
915 };
916
917 static const u32 bonaire_golden_registers[] =
918 {
919         0x3354, 0x00000333, 0x00000333,
920         0x3350, 0x000c0fc0, 0x00040200,
921         0x9a10, 0x00010000, 0x00058208,
922         0x3c000, 0xffff1fff, 0x00140000,
923         0x3c200, 0xfdfc0fff, 0x00000100,
924         0x3c234, 0x40000000, 0x40000200,
925         0x9830, 0xffffffff, 0x00000000,
926         0x9834, 0xf00fffff, 0x00000400,
927         0x9838, 0x0002021c, 0x00020200,
928         0xc78, 0x00000080, 0x00000000,
929         0x5bb0, 0x000000f0, 0x00000070,
930         0x5bc0, 0xf0311fff, 0x80300000,
931         0x98f8, 0x73773777, 0x12010001,
932         0x350c, 0x00810000, 0x408af000,
933         0x7030, 0x31000111, 0x00000011,
934         0x2f48, 0x73773777, 0x12010001,
935         0x220c, 0x00007fb6, 0x0021a1b1,
936         0x2210, 0x00007fb6, 0x002021b1,
937         0x2180, 0x00007fb6, 0x00002191,
938         0x2218, 0x00007fb6, 0x002121b1,
939         0x221c, 0x00007fb6, 0x002021b1,
940         0x21dc, 0x00007fb6, 0x00002191,
941         0x21e0, 0x00007fb6, 0x00002191,
942         0x3628, 0x0000003f, 0x0000000a,
943         0x362c, 0x0000003f, 0x0000000a,
944         0x2ae4, 0x00073ffe, 0x000022a2,
945         0x240c, 0x000007ff, 0x00000000,
946         0x8a14, 0xf000003f, 0x00000007,
947         0x8bf0, 0x00002001, 0x00000001,
948         0x8b24, 0xffffffff, 0x00ffffff,
949         0x30a04, 0x0000ff0f, 0x00000000,
950         0x28a4c, 0x07ffffff, 0x06000000,
951         0x4d8, 0x00000fff, 0x00000100,
952         0x3e78, 0x00000001, 0x00000002,
953         0x9100, 0x03000000, 0x0362c688,
954         0x8c00, 0x000000ff, 0x00000001,
955         0xe40, 0x00001fff, 0x00001fff,
956         0x9060, 0x0000007f, 0x00000020,
957         0x9508, 0x00010000, 0x00010000,
958         0xac14, 0x000003ff, 0x000000f3,
959         0xac0c, 0xffffffff, 0x00001032
960 };
961
962 static const u32 bonaire_mgcg_cgcg_init[] =
963 {
964         0xc420, 0xffffffff, 0xfffffffc,
965         0x30800, 0xffffffff, 0xe0000000,
966         0x3c2a0, 0xffffffff, 0x00000100,
967         0x3c208, 0xffffffff, 0x00000100,
968         0x3c2c0, 0xffffffff, 0xc0000100,
969         0x3c2c8, 0xffffffff, 0xc0000100,
970         0x3c2c4, 0xffffffff, 0xc0000100,
971         0x55e4, 0xffffffff, 0x00600100,
972         0x3c280, 0xffffffff, 0x00000100,
973         0x3c214, 0xffffffff, 0x06000100,
974         0x3c220, 0xffffffff, 0x00000100,
975         0x3c218, 0xffffffff, 0x06000100,
976         0x3c204, 0xffffffff, 0x00000100,
977         0x3c2e0, 0xffffffff, 0x00000100,
978         0x3c224, 0xffffffff, 0x00000100,
979         0x3c200, 0xffffffff, 0x00000100,
980         0x3c230, 0xffffffff, 0x00000100,
981         0x3c234, 0xffffffff, 0x00000100,
982         0x3c250, 0xffffffff, 0x00000100,
983         0x3c254, 0xffffffff, 0x00000100,
984         0x3c258, 0xffffffff, 0x00000100,
985         0x3c25c, 0xffffffff, 0x00000100,
986         0x3c260, 0xffffffff, 0x00000100,
987         0x3c27c, 0xffffffff, 0x00000100,
988         0x3c278, 0xffffffff, 0x00000100,
989         0x3c210, 0xffffffff, 0x06000100,
990         0x3c290, 0xffffffff, 0x00000100,
991         0x3c274, 0xffffffff, 0x00000100,
992         0x3c2b4, 0xffffffff, 0x00000100,
993         0x3c2b0, 0xffffffff, 0x00000100,
994         0x3c270, 0xffffffff, 0x00000100,
995         0x30800, 0xffffffff, 0xe0000000,
996         0x3c020, 0xffffffff, 0x00010000,
997         0x3c024, 0xffffffff, 0x00030002,
998         0x3c028, 0xffffffff, 0x00040007,
999         0x3c02c, 0xffffffff, 0x00060005,
1000         0x3c030, 0xffffffff, 0x00090008,
1001         0x3c034, 0xffffffff, 0x00010000,
1002         0x3c038, 0xffffffff, 0x00030002,
1003         0x3c03c, 0xffffffff, 0x00040007,
1004         0x3c040, 0xffffffff, 0x00060005,
1005         0x3c044, 0xffffffff, 0x00090008,
1006         0x3c048, 0xffffffff, 0x00010000,
1007         0x3c04c, 0xffffffff, 0x00030002,
1008         0x3c050, 0xffffffff, 0x00040007,
1009         0x3c054, 0xffffffff, 0x00060005,
1010         0x3c058, 0xffffffff, 0x00090008,
1011         0x3c05c, 0xffffffff, 0x00010000,
1012         0x3c060, 0xffffffff, 0x00030002,
1013         0x3c064, 0xffffffff, 0x00040007,
1014         0x3c068, 0xffffffff, 0x00060005,
1015         0x3c06c, 0xffffffff, 0x00090008,
1016         0x3c070, 0xffffffff, 0x00010000,
1017         0x3c074, 0xffffffff, 0x00030002,
1018         0x3c078, 0xffffffff, 0x00040007,
1019         0x3c07c, 0xffffffff, 0x00060005,
1020         0x3c080, 0xffffffff, 0x00090008,
1021         0x3c084, 0xffffffff, 0x00010000,
1022         0x3c088, 0xffffffff, 0x00030002,
1023         0x3c08c, 0xffffffff, 0x00040007,
1024         0x3c090, 0xffffffff, 0x00060005,
1025         0x3c094, 0xffffffff, 0x00090008,
1026         0x3c098, 0xffffffff, 0x00010000,
1027         0x3c09c, 0xffffffff, 0x00030002,
1028         0x3c0a0, 0xffffffff, 0x00040007,
1029         0x3c0a4, 0xffffffff, 0x00060005,
1030         0x3c0a8, 0xffffffff, 0x00090008,
1031         0x3c000, 0xffffffff, 0x96e00200,
1032         0x8708, 0xffffffff, 0x00900100,
1033         0xc424, 0xffffffff, 0x0020003f,
1034         0x38, 0xffffffff, 0x0140001c,
1035         0x3c, 0x000f0000, 0x000f0000,
1036         0x220, 0xffffffff, 0xC060000C,
1037         0x224, 0xc0000fff, 0x00000100,
1038         0xf90, 0xffffffff, 0x00000100,
1039         0xf98, 0x00000101, 0x00000000,
1040         0x20a8, 0xffffffff, 0x00000104,
1041         0x55e4, 0xff000fff, 0x00000100,
1042         0x30cc, 0xc0000fff, 0x00000104,
1043         0xc1e4, 0x00000001, 0x00000001,
1044         0xd00c, 0xff000ff0, 0x00000100,
1045         0xd80c, 0xff000ff0, 0x00000100
1046 };
1047
1048 static const u32 spectre_golden_spm_registers[] =
1049 {
1050         0x30800, 0xe0ffffff, 0xe0000000
1051 };
1052
1053 static const u32 spectre_golden_common_registers[] =
1054 {
1055         0xc770, 0xffffffff, 0x00000800,
1056         0xc774, 0xffffffff, 0x00000800,
1057         0xc798, 0xffffffff, 0x00007fbf,
1058         0xc79c, 0xffffffff, 0x00007faf
1059 };
1060
1061 static const u32 spectre_golden_registers[] =
1062 {
1063         0x3c000, 0xffff1fff, 0x96940200,
1064         0x3c00c, 0xffff0001, 0xff000000,
1065         0x3c200, 0xfffc0fff, 0x00000100,
1066         0x6ed8, 0x00010101, 0x00010000,
1067         0x9834, 0xf00fffff, 0x00000400,
1068         0x9838, 0xfffffffc, 0x00020200,
1069         0x5bb0, 0x000000f0, 0x00000070,
1070         0x5bc0, 0xf0311fff, 0x80300000,
1071         0x98f8, 0x73773777, 0x12010001,
1072         0x9b7c, 0x00ff0000, 0x00fc0000,
1073         0x2f48, 0x73773777, 0x12010001,
1074         0x8a14, 0xf000003f, 0x00000007,
1075         0x8b24, 0xffffffff, 0x00ffffff,
1076         0x28350, 0x3f3f3fff, 0x00000082,
1077         0x28355, 0x0000003f, 0x00000000,
1078         0x3e78, 0x00000001, 0x00000002,
1079         0x913c, 0xffff03df, 0x00000004,
1080         0xc768, 0x00000008, 0x00000008,
1081         0x8c00, 0x000008ff, 0x00000800,
1082         0x9508, 0x00010000, 0x00010000,
1083         0xac0c, 0xffffffff, 0x54763210,
1084         0x214f8, 0x01ff01ff, 0x00000002,
1085         0x21498, 0x007ff800, 0x00200000,
1086         0x2015c, 0xffffffff, 0x00000f40,
1087         0x30934, 0xffffffff, 0x00000001
1088 };
1089
1090 static const u32 spectre_mgcg_cgcg_init[] =
1091 {
1092         0xc420, 0xffffffff, 0xfffffffc,
1093         0x30800, 0xffffffff, 0xe0000000,
1094         0x3c2a0, 0xffffffff, 0x00000100,
1095         0x3c208, 0xffffffff, 0x00000100,
1096         0x3c2c0, 0xffffffff, 0x00000100,
1097         0x3c2c8, 0xffffffff, 0x00000100,
1098         0x3c2c4, 0xffffffff, 0x00000100,
1099         0x55e4, 0xffffffff, 0x00600100,
1100         0x3c280, 0xffffffff, 0x00000100,
1101         0x3c214, 0xffffffff, 0x06000100,
1102         0x3c220, 0xffffffff, 0x00000100,
1103         0x3c218, 0xffffffff, 0x06000100,
1104         0x3c204, 0xffffffff, 0x00000100,
1105         0x3c2e0, 0xffffffff, 0x00000100,
1106         0x3c224, 0xffffffff, 0x00000100,
1107         0x3c200, 0xffffffff, 0x00000100,
1108         0x3c230, 0xffffffff, 0x00000100,
1109         0x3c234, 0xffffffff, 0x00000100,
1110         0x3c250, 0xffffffff, 0x00000100,
1111         0x3c254, 0xffffffff, 0x00000100,
1112         0x3c258, 0xffffffff, 0x00000100,
1113         0x3c25c, 0xffffffff, 0x00000100,
1114         0x3c260, 0xffffffff, 0x00000100,
1115         0x3c27c, 0xffffffff, 0x00000100,
1116         0x3c278, 0xffffffff, 0x00000100,
1117         0x3c210, 0xffffffff, 0x06000100,
1118         0x3c290, 0xffffffff, 0x00000100,
1119         0x3c274, 0xffffffff, 0x00000100,
1120         0x3c2b4, 0xffffffff, 0x00000100,
1121         0x3c2b0, 0xffffffff, 0x00000100,
1122         0x3c270, 0xffffffff, 0x00000100,
1123         0x30800, 0xffffffff, 0xe0000000,
1124         0x3c020, 0xffffffff, 0x00010000,
1125         0x3c024, 0xffffffff, 0x00030002,
1126         0x3c028, 0xffffffff, 0x00040007,
1127         0x3c02c, 0xffffffff, 0x00060005,
1128         0x3c030, 0xffffffff, 0x00090008,
1129         0x3c034, 0xffffffff, 0x00010000,
1130         0x3c038, 0xffffffff, 0x00030002,
1131         0x3c03c, 0xffffffff, 0x00040007,
1132         0x3c040, 0xffffffff, 0x00060005,
1133         0x3c044, 0xffffffff, 0x00090008,
1134         0x3c048, 0xffffffff, 0x00010000,
1135         0x3c04c, 0xffffffff, 0x00030002,
1136         0x3c050, 0xffffffff, 0x00040007,
1137         0x3c054, 0xffffffff, 0x00060005,
1138         0x3c058, 0xffffffff, 0x00090008,
1139         0x3c05c, 0xffffffff, 0x00010000,
1140         0x3c060, 0xffffffff, 0x00030002,
1141         0x3c064, 0xffffffff, 0x00040007,
1142         0x3c068, 0xffffffff, 0x00060005,
1143         0x3c06c, 0xffffffff, 0x00090008,
1144         0x3c070, 0xffffffff, 0x00010000,
1145         0x3c074, 0xffffffff, 0x00030002,
1146         0x3c078, 0xffffffff, 0x00040007,
1147         0x3c07c, 0xffffffff, 0x00060005,
1148         0x3c080, 0xffffffff, 0x00090008,
1149         0x3c084, 0xffffffff, 0x00010000,
1150         0x3c088, 0xffffffff, 0x00030002,
1151         0x3c08c, 0xffffffff, 0x00040007,
1152         0x3c090, 0xffffffff, 0x00060005,
1153         0x3c094, 0xffffffff, 0x00090008,
1154         0x3c098, 0xffffffff, 0x00010000,
1155         0x3c09c, 0xffffffff, 0x00030002,
1156         0x3c0a0, 0xffffffff, 0x00040007,
1157         0x3c0a4, 0xffffffff, 0x00060005,
1158         0x3c0a8, 0xffffffff, 0x00090008,
1159         0x3c0ac, 0xffffffff, 0x00010000,
1160         0x3c0b0, 0xffffffff, 0x00030002,
1161         0x3c0b4, 0xffffffff, 0x00040007,
1162         0x3c0b8, 0xffffffff, 0x00060005,
1163         0x3c0bc, 0xffffffff, 0x00090008,
1164         0x3c000, 0xffffffff, 0x96e00200,
1165         0x8708, 0xffffffff, 0x00900100,
1166         0xc424, 0xffffffff, 0x0020003f,
1167         0x38, 0xffffffff, 0x0140001c,
1168         0x3c, 0x000f0000, 0x000f0000,
1169         0x220, 0xffffffff, 0xC060000C,
1170         0x224, 0xc0000fff, 0x00000100,
1171         0xf90, 0xffffffff, 0x00000100,
1172         0xf98, 0x00000101, 0x00000000,
1173         0x20a8, 0xffffffff, 0x00000104,
1174         0x55e4, 0xff000fff, 0x00000100,
1175         0x30cc, 0xc0000fff, 0x00000104,
1176         0xc1e4, 0x00000001, 0x00000001,
1177         0xd00c, 0xff000ff0, 0x00000100,
1178         0xd80c, 0xff000ff0, 0x00000100
1179 };
1180
1181 static const u32 kalindi_golden_spm_registers[] =
1182 {
1183         0x30800, 0xe0ffffff, 0xe0000000
1184 };
1185
1186 static const u32 kalindi_golden_common_registers[] =
1187 {
1188         0xc770, 0xffffffff, 0x00000800,
1189         0xc774, 0xffffffff, 0x00000800,
1190         0xc798, 0xffffffff, 0x00007fbf,
1191         0xc79c, 0xffffffff, 0x00007faf
1192 };
1193
1194 static const u32 kalindi_golden_registers[] =
1195 {
1196         0x3c000, 0xffffdfff, 0x6e944040,
1197         0x55e4, 0xff607fff, 0xfc000100,
1198         0x3c220, 0xff000fff, 0x00000100,
1199         0x3c224, 0xff000fff, 0x00000100,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9830, 0xffffffff, 0x00000000,
1203         0x9834, 0xf00fffff, 0x00000400,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x98fc, 0xffffffff, 0x00000010,
1208         0x9b7c, 0x00ff0000, 0x00fc0000,
1209         0x8030, 0x00001f0f, 0x0000100a,
1210         0x2f48, 0x73773777, 0x12010001,
1211         0x2408, 0x000fffff, 0x000c007f,
1212         0x8a14, 0xf000003f, 0x00000007,
1213         0x8b24, 0x3fff3fff, 0x00ffcfff,
1214         0x30a04, 0x0000ff0f, 0x00000000,
1215         0x28a4c, 0x07ffffff, 0x06000000,
1216         0x4d8, 0x00000fff, 0x00000100,
1217         0x3e78, 0x00000001, 0x00000002,
1218         0xc768, 0x00000008, 0x00000008,
1219         0x8c00, 0x000000ff, 0x00000003,
1220         0x214f8, 0x01ff01ff, 0x00000002,
1221         0x21498, 0x007ff800, 0x00200000,
1222         0x2015c, 0xffffffff, 0x00000f40,
1223         0x88c4, 0x001f3ae3, 0x00000082,
1224         0x88d4, 0x0000001f, 0x00000010,
1225         0x30934, 0xffffffff, 0x00000000
1226 };
1227
1228 static const u32 kalindi_mgcg_cgcg_init[] =
1229 {
1230         0xc420, 0xffffffff, 0xfffffffc,
1231         0x30800, 0xffffffff, 0xe0000000,
1232         0x3c2a0, 0xffffffff, 0x00000100,
1233         0x3c208, 0xffffffff, 0x00000100,
1234         0x3c2c0, 0xffffffff, 0x00000100,
1235         0x3c2c8, 0xffffffff, 0x00000100,
1236         0x3c2c4, 0xffffffff, 0x00000100,
1237         0x55e4, 0xffffffff, 0x00600100,
1238         0x3c280, 0xffffffff, 0x00000100,
1239         0x3c214, 0xffffffff, 0x06000100,
1240         0x3c220, 0xffffffff, 0x00000100,
1241         0x3c218, 0xffffffff, 0x06000100,
1242         0x3c204, 0xffffffff, 0x00000100,
1243         0x3c2e0, 0xffffffff, 0x00000100,
1244         0x3c224, 0xffffffff, 0x00000100,
1245         0x3c200, 0xffffffff, 0x00000100,
1246         0x3c230, 0xffffffff, 0x00000100,
1247         0x3c234, 0xffffffff, 0x00000100,
1248         0x3c250, 0xffffffff, 0x00000100,
1249         0x3c254, 0xffffffff, 0x00000100,
1250         0x3c258, 0xffffffff, 0x00000100,
1251         0x3c25c, 0xffffffff, 0x00000100,
1252         0x3c260, 0xffffffff, 0x00000100,
1253         0x3c27c, 0xffffffff, 0x00000100,
1254         0x3c278, 0xffffffff, 0x00000100,
1255         0x3c210, 0xffffffff, 0x06000100,
1256         0x3c290, 0xffffffff, 0x00000100,
1257         0x3c274, 0xffffffff, 0x00000100,
1258         0x3c2b4, 0xffffffff, 0x00000100,
1259         0x3c2b0, 0xffffffff, 0x00000100,
1260         0x3c270, 0xffffffff, 0x00000100,
1261         0x30800, 0xffffffff, 0xe0000000,
1262         0x3c020, 0xffffffff, 0x00010000,
1263         0x3c024, 0xffffffff, 0x00030002,
1264         0x3c028, 0xffffffff, 0x00040007,
1265         0x3c02c, 0xffffffff, 0x00060005,
1266         0x3c030, 0xffffffff, 0x00090008,
1267         0x3c034, 0xffffffff, 0x00010000,
1268         0x3c038, 0xffffffff, 0x00030002,
1269         0x3c03c, 0xffffffff, 0x00040007,
1270         0x3c040, 0xffffffff, 0x00060005,
1271         0x3c044, 0xffffffff, 0x00090008,
1272         0x3c000, 0xffffffff, 0x96e00200,
1273         0x8708, 0xffffffff, 0x00900100,
1274         0xc424, 0xffffffff, 0x0020003f,
1275         0x38, 0xffffffff, 0x0140001c,
1276         0x3c, 0x000f0000, 0x000f0000,
1277         0x220, 0xffffffff, 0xC060000C,
1278         0x224, 0xc0000fff, 0x00000100,
1279         0x20a8, 0xffffffff, 0x00000104,
1280         0x55e4, 0xff000fff, 0x00000100,
1281         0x30cc, 0xc0000fff, 0x00000104,
1282         0xc1e4, 0x00000001, 0x00000001,
1283         0xd00c, 0xff000ff0, 0x00000100,
1284         0xd80c, 0xff000ff0, 0x00000100
1285 };
1286
1287 static void cik_init_golden_registers(struct radeon_device *rdev)
1288 {
1289         switch (rdev->family) {
1290         case CHIP_BONAIRE:
1291                 radeon_program_register_sequence(rdev,
1292                                                  bonaire_mgcg_cgcg_init,
1293                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1294                 radeon_program_register_sequence(rdev,
1295                                                  bonaire_golden_registers,
1296                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1297                 radeon_program_register_sequence(rdev,
1298                                                  bonaire_golden_common_registers,
1299                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1300                 radeon_program_register_sequence(rdev,
1301                                                  bonaire_golden_spm_registers,
1302                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1303                 break;
1304         case CHIP_KABINI:
1305                 radeon_program_register_sequence(rdev,
1306                                                  kalindi_mgcg_cgcg_init,
1307                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1308                 radeon_program_register_sequence(rdev,
1309                                                  kalindi_golden_registers,
1310                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1311                 radeon_program_register_sequence(rdev,
1312                                                  kalindi_golden_common_registers,
1313                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1314                 radeon_program_register_sequence(rdev,
1315                                                  kalindi_golden_spm_registers,
1316                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1317                 break;
1318         case CHIP_KAVERI:
1319                 radeon_program_register_sequence(rdev,
1320                                                  spectre_mgcg_cgcg_init,
1321                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1322                 radeon_program_register_sequence(rdev,
1323                                                  spectre_golden_registers,
1324                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1325                 radeon_program_register_sequence(rdev,
1326                                                  spectre_golden_common_registers,
1327                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1328                 radeon_program_register_sequence(rdev,
1329                                                  spectre_golden_spm_registers,
1330                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1331                 break;
1332         default:
1333                 break;
1334         }
1335 }
1336
1337 /**
1338  * cik_get_xclk - get the xclk
1339  *
1340  * @rdev: radeon_device pointer
1341  *
1342  * Returns the reference clock used by the gfx engine
1343  * (CIK).
1344  */
1345 u32 cik_get_xclk(struct radeon_device *rdev)
1346 {
1347         u32 reference_clock = rdev->clock.spll.reference_freq;
1348
1349         if (rdev->flags & RADEON_IS_IGP) {
1350                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1351                         return reference_clock / 2;
1352         } else {
1353                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1354                         return reference_clock / 4;
1355         }
1356         return reference_clock;
1357 }
1358
1359 /**
1360  * cik_mm_rdoorbell - read a doorbell dword
1361  *
1362  * @rdev: radeon_device pointer
1363  * @offset: byte offset into the aperture
1364  *
1365  * Returns the value in the doorbell aperture at the
1366  * requested offset (CIK).
1367  */
1368 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1369 {
1370         if (offset < rdev->doorbell.size) {
1371                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1372         } else {
1373                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1374                 return 0;
1375         }
1376 }
1377
1378 /**
1379  * cik_mm_wdoorbell - write a doorbell dword
1380  *
1381  * @rdev: radeon_device pointer
1382  * @offset: byte offset into the aperture
1383  * @v: value to write
1384  *
1385  * Writes @v to the doorbell aperture at the
1386  * requested offset (CIK).
1387  */
1388 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1389 {
1390         if (offset < rdev->doorbell.size) {
1391                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1392         } else {
1393                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1394         }
1395 }
1396
1397 #define BONAIRE_IO_MC_REGS_SIZE 36
1398
1399 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1400 {
1401         {0x00000070, 0x04400000},
1402         {0x00000071, 0x80c01803},
1403         {0x00000072, 0x00004004},
1404         {0x00000073, 0x00000100},
1405         {0x00000074, 0x00ff0000},
1406         {0x00000075, 0x34000000},
1407         {0x00000076, 0x08000014},
1408         {0x00000077, 0x00cc08ec},
1409         {0x00000078, 0x00000400},
1410         {0x00000079, 0x00000000},
1411         {0x0000007a, 0x04090000},
1412         {0x0000007c, 0x00000000},
1413         {0x0000007e, 0x4408a8e8},
1414         {0x0000007f, 0x00000304},
1415         {0x00000080, 0x00000000},
1416         {0x00000082, 0x00000001},
1417         {0x00000083, 0x00000002},
1418         {0x00000084, 0xf3e4f400},
1419         {0x00000085, 0x052024e3},
1420         {0x00000087, 0x00000000},
1421         {0x00000088, 0x01000000},
1422         {0x0000008a, 0x1c0a0000},
1423         {0x0000008b, 0xff010000},
1424         {0x0000008d, 0xffffefff},
1425         {0x0000008e, 0xfff3efff},
1426         {0x0000008f, 0xfff3efbf},
1427         {0x00000092, 0xf7ffffff},
1428         {0x00000093, 0xffffff7f},
1429         {0x00000095, 0x00101101},
1430         {0x00000096, 0x00000fff},
1431         {0x00000097, 0x00116fff},
1432         {0x00000098, 0x60010000},
1433         {0x00000099, 0x10010000},
1434         {0x0000009a, 0x00006000},
1435         {0x0000009b, 0x00001000},
1436         {0x0000009f, 0x00b48000}
1437 };
1438
1439 /**
1440  * cik_srbm_select - select specific register instances
1441  *
1442  * @rdev: radeon_device pointer
1443  * @me: selected ME (micro engine)
1444  * @pipe: pipe
1445  * @queue: queue
1446  * @vmid: VMID
1447  *
1448  * Switches the currently active registers instances.  Some
1449  * registers are instanced per VMID, others are instanced per
1450  * me/pipe/queue combination.
1451  */
1452 static void cik_srbm_select(struct radeon_device *rdev,
1453                             u32 me, u32 pipe, u32 queue, u32 vmid)
1454 {
1455         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1456                              MEID(me & 0x3) |
1457                              VMID(vmid & 0xf) |
1458                              QUEUEID(queue & 0x7));
1459         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1460 }
1461
1462 /* ucode loading */
1463 /**
1464  * ci_mc_load_microcode - load MC ucode into the hw
1465  *
1466  * @rdev: radeon_device pointer
1467  *
1468  * Load the GDDR MC ucode into the hw (CIK).
1469  * Returns 0 on success, error on failure.
1470  */
1471 static int ci_mc_load_microcode(struct radeon_device *rdev)
1472 {
1473         const __be32 *fw_data;
1474         u32 running, blackout = 0;
1475         u32 *io_mc_regs;
1476         int i, ucode_size, regs_size;
1477
1478         if (!rdev->mc_fw)
1479                 return -EINVAL;
1480
1481         switch (rdev->family) {
1482         case CHIP_BONAIRE:
1483         default:
1484                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1485                 ucode_size = CIK_MC_UCODE_SIZE;
1486                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1487                 break;
1488         }
1489
1490         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1491
1492         if (running == 0) {
1493                 if (running) {
1494                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1495                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1496                 }
1497
1498                 /* reset the engine and set to writable */
1499                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1500                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1501
1502                 /* load mc io regs */
1503                 for (i = 0; i < regs_size; i++) {
1504                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1505                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1506                 }
1507                 /* load the MC ucode */
1508                 fw_data = (const __be32 *)rdev->mc_fw->data;
1509                 for (i = 0; i < ucode_size; i++)
1510                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1511
1512                 /* put the engine back into the active state */
1513                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1514                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1515                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1516
1517                 /* wait for training to complete */
1518                 for (i = 0; i < rdev->usec_timeout; i++) {
1519                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1520                                 break;
1521                         udelay(1);
1522                 }
1523                 for (i = 0; i < rdev->usec_timeout; i++) {
1524                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1525                                 break;
1526                         udelay(1);
1527                 }
1528
1529                 if (running)
1530                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1531         }
1532
1533         return 0;
1534 }
1535
1536 /**
1537  * cik_init_microcode - load ucode images from disk
1538  *
1539  * @rdev: radeon_device pointer
1540  *
1541  * Use the firmware interface to load the ucode images into
1542  * the driver (not loaded into hw).
1543  * Returns 0 on success, error on failure.
1544  */
1545 static int cik_init_microcode(struct radeon_device *rdev)
1546 {
1547         const char *chip_name;
1548         size_t pfp_req_size, me_req_size, ce_req_size,
1549                 mec_req_size, rlc_req_size, mc_req_size,
1550                 sdma_req_size, smc_req_size;
1551         char fw_name[30];
1552         int err;
1553
1554         DRM_DEBUG("\n");
1555
1556         switch (rdev->family) {
1557         case CHIP_BONAIRE:
1558                 chip_name = "BONAIRE";
1559                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1560                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1561                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1562                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1563                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1564                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1565                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1566                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1567                 break;
1568         case CHIP_KAVERI:
1569                 chip_name = "KAVERI";
1570                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1571                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1572                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1573                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1574                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1575                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1576                 break;
1577         case CHIP_KABINI:
1578                 chip_name = "KABINI";
1579                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1580                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1581                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1582                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1583                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1584                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1585                 break;
1586         default: BUG();
1587         }
1588
1589         DRM_INFO("Loading %s Microcode\n", chip_name);
1590
1591         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1592         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1593         if (err)
1594                 goto out;
1595         if (rdev->pfp_fw->size != pfp_req_size) {
1596                 printk(KERN_ERR
1597                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1598                        rdev->pfp_fw->size, fw_name);
1599                 err = -EINVAL;
1600                 goto out;
1601         }
1602
1603         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1604         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1605         if (err)
1606                 goto out;
1607         if (rdev->me_fw->size != me_req_size) {
1608                 printk(KERN_ERR
1609                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1610                        rdev->me_fw->size, fw_name);
1611                 err = -EINVAL;
1612         }
1613
1614         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1615         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1616         if (err)
1617                 goto out;
1618         if (rdev->ce_fw->size != ce_req_size) {
1619                 printk(KERN_ERR
1620                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1621                        rdev->ce_fw->size, fw_name);
1622                 err = -EINVAL;
1623         }
1624
1625         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1626         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1627         if (err)
1628                 goto out;
1629         if (rdev->mec_fw->size != mec_req_size) {
1630                 printk(KERN_ERR
1631                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1632                        rdev->mec_fw->size, fw_name);
1633                 err = -EINVAL;
1634         }
1635
1636         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1637         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1638         if (err)
1639                 goto out;
1640         if (rdev->rlc_fw->size != rlc_req_size) {
1641                 printk(KERN_ERR
1642                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1643                        rdev->rlc_fw->size, fw_name);
1644                 err = -EINVAL;
1645         }
1646
1647         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1648         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1649         if (err)
1650                 goto out;
1651         if (rdev->sdma_fw->size != sdma_req_size) {
1652                 printk(KERN_ERR
1653                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1654                        rdev->sdma_fw->size, fw_name);
1655                 err = -EINVAL;
1656         }
1657
1658         /* No SMC, MC ucode on APUs */
1659         if (!(rdev->flags & RADEON_IS_IGP)) {
1660                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662                 if (err)
1663                         goto out;
1664                 if (rdev->mc_fw->size != mc_req_size) {
1665                         printk(KERN_ERR
1666                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1667                                rdev->mc_fw->size, fw_name);
1668                         err = -EINVAL;
1669                 }
1670
1671                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673                 if (err) {
1674                         printk(KERN_ERR
1675                                "smc: error loading firmware \"%s\"\n",
1676                                fw_name);
1677                         release_firmware(rdev->smc_fw);
1678                         rdev->smc_fw = NULL;
1679                 } else if (rdev->smc_fw->size != smc_req_size) {
1680                         printk(KERN_ERR
1681                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1682                                rdev->smc_fw->size, fw_name);
1683                         err = -EINVAL;
1684                 }
1685         }
1686
1687 out:
1688         if (err) {
1689                 if (err != -EINVAL)
1690                         printk(KERN_ERR
1691                                "cik_cp: Failed to load firmware \"%s\"\n",
1692                                fw_name);
1693                 release_firmware(rdev->pfp_fw);
1694                 rdev->pfp_fw = NULL;
1695                 release_firmware(rdev->me_fw);
1696                 rdev->me_fw = NULL;
1697                 release_firmware(rdev->ce_fw);
1698                 rdev->ce_fw = NULL;
1699                 release_firmware(rdev->rlc_fw);
1700                 rdev->rlc_fw = NULL;
1701                 release_firmware(rdev->mc_fw);
1702                 rdev->mc_fw = NULL;
1703                 release_firmware(rdev->smc_fw);
1704                 rdev->smc_fw = NULL;
1705         }
1706         return err;
1707 }
1708
1709 /*
1710  * Core functions
1711  */
1712 /**
1713  * cik_tiling_mode_table_init - init the hw tiling table
1714  *
1715  * @rdev: radeon_device pointer
1716  *
1717  * Starting with SI, the tiling setup is done globally in a
1718  * set of 32 tiling modes.  Rather than selecting each set of
1719  * parameters per surface as on older asics, we just select
1720  * which index in the tiling table we want to use, and the
1721  * surface uses those parameters (CIK).
1722  */
1723 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1724 {
1725         const u32 num_tile_mode_states = 32;
1726         const u32 num_secondary_tile_mode_states = 16;
1727         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1728         u32 num_pipe_configs;
1729         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1730                 rdev->config.cik.max_shader_engines;
1731
1732         switch (rdev->config.cik.mem_row_size_in_kb) {
1733         case 1:
1734                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1735                 break;
1736         case 2:
1737         default:
1738                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1739                 break;
1740         case 4:
1741                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1742                 break;
1743         }
1744
1745         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1746         if (num_pipe_configs > 8)
1747                 num_pipe_configs = 8; /* ??? */
1748
1749         if (num_pipe_configs == 8) {
1750                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1751                         switch (reg_offset) {
1752                         case 0:
1753                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1754                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1755                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1756                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1757                                 break;
1758                         case 1:
1759                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1760                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1761                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1762                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1763                                 break;
1764                         case 2:
1765                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1766                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1767                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1768                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1769                                 break;
1770                         case 3:
1771                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1775                                 break;
1776                         case 4:
1777                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780                                                  TILE_SPLIT(split_equal_to_row_size));
1781                                 break;
1782                         case 5:
1783                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1784                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1785                                 break;
1786                         case 6:
1787                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1788                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1791                                 break;
1792                         case 7:
1793                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1794                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796                                                  TILE_SPLIT(split_equal_to_row_size));
1797                                 break;
1798                         case 8:
1799                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1800                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1801                                 break;
1802                         case 9:
1803                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1804                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1805                                 break;
1806                         case 10:
1807                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1808                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1809                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1810                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1811                                 break;
1812                         case 11:
1813                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1814                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1815                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1816                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1817                                 break;
1818                         case 12:
1819                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1820                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1821                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1822                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1823                                 break;
1824                         case 13:
1825                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1826                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1827                                 break;
1828                         case 14:
1829                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1831                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1832                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833                                 break;
1834                         case 16:
1835                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1836                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1837                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1838                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839                                 break;
1840                         case 17:
1841                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1842                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1843                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1844                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1845                                 break;
1846                         case 27:
1847                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1848                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1849                                 break;
1850                         case 28:
1851                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1852                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1853                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1854                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855                                 break;
1856                         case 29:
1857                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1859                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1860                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861                                 break;
1862                         case 30:
1863                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1864                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1865                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1866                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867                                 break;
1868                         default:
1869                                 gb_tile_moden = 0;
1870                                 break;
1871                         }
1872                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1873                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1874                 }
1875                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1876                         switch (reg_offset) {
1877                         case 0:
1878                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1879                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1880                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1881                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1882                                 break;
1883                         case 1:
1884                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1887                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1888                                 break;
1889                         case 2:
1890                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1891                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1892                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1893                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1894                                 break;
1895                         case 3:
1896                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1898                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1899                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1900                                 break;
1901                         case 4:
1902                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1904                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1905                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1906                                 break;
1907                         case 5:
1908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1911                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1912                                 break;
1913                         case 6:
1914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1917                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1918                                 break;
1919                         case 8:
1920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1924                                 break;
1925                         case 9:
1926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1930                                 break;
1931                         case 10:
1932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1936                                 break;
1937                         case 11:
1938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1942                                 break;
1943                         case 12:
1944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1948                                 break;
1949                         case 13:
1950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1953                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1954                                 break;
1955                         case 14:
1956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1959                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1960                                 break;
1961                         default:
1962                                 gb_tile_moden = 0;
1963                                 break;
1964                         }
1965                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1966                 }
1967         } else if (num_pipe_configs == 4) {
1968                 if (num_rbs == 4) {
1969                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1970                                 switch (reg_offset) {
1971                                 case 0:
1972                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1973                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1974                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1975                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1976                                         break;
1977                                 case 1:
1978                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1979                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1980                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1981                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1982                                         break;
1983                                 case 2:
1984                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1985                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1986                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1987                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1988                                         break;
1989                                 case 3:
1990                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1994                                         break;
1995                                 case 4:
1996                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999                                                          TILE_SPLIT(split_equal_to_row_size));
2000                                         break;
2001                                 case 5:
2002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2003                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2004                                         break;
2005                                 case 6:
2006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2010                                         break;
2011                                 case 7:
2012                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2013                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015                                                          TILE_SPLIT(split_equal_to_row_size));
2016                                         break;
2017                                 case 8:
2018                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2019                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2020                                         break;
2021                                 case 9:
2022                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2023                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2024                                         break;
2025                                 case 10:
2026                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2028                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2029                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2030                                         break;
2031                                 case 11:
2032                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2033                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2034                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2035                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2036                                         break;
2037                                 case 12:
2038                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2039                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2040                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2041                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2042                                         break;
2043                                 case 13:
2044                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2045                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2046                                         break;
2047                                 case 14:
2048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2050                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2051                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052                                         break;
2053                                 case 16:
2054                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2055                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2056                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2057                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                         break;
2059                                 case 17:
2060                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2061                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2063                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                         break;
2065                                 case 27:
2066                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2067                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2068                                         break;
2069                                 case 28:
2070                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2071                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2072                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2073                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                                         break;
2075                                 case 29:
2076                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2078                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2079                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                         break;
2081                                 case 30:
2082                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2083                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2084                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2085                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                         break;
2087                                 default:
2088                                         gb_tile_moden = 0;
2089                                         break;
2090                                 }
2091                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2092                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2093                         }
2094                 } else if (num_rbs < 4) {
2095                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2096                                 switch (reg_offset) {
2097                                 case 0:
2098                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2100                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2101                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2102                                         break;
2103                                 case 1:
2104                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2106                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2107                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2108                                         break;
2109                                 case 2:
2110                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2112                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2114                                         break;
2115                                 case 3:
2116                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2120                                         break;
2121                                 case 4:
2122                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125                                                          TILE_SPLIT(split_equal_to_row_size));
2126                                         break;
2127                                 case 5:
2128                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                                         break;
2131                                 case 6:
2132                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2133                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2136                                         break;
2137                                 case 7:
2138                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2139                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141                                                          TILE_SPLIT(split_equal_to_row_size));
2142                                         break;
2143                                 case 8:
2144                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2145                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2146                                         break;
2147                                 case 9:
2148                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2150                                         break;
2151                                 case 10:
2152                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2154                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2155                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                                         break;
2157                                 case 11:
2158                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2161                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2162                                         break;
2163                                 case 12:
2164                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2165                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2167                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168                                         break;
2169                                 case 13:
2170                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2172                                         break;
2173                                 case 14:
2174                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178                                         break;
2179                                 case 16:
2180                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                                         break;
2185                                 case 17:
2186                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2187                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2189                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                                         break;
2191                                 case 27:
2192                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2194                                         break;
2195                                 case 28:
2196                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2197                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2198                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                                         break;
2201                                 case 29:
2202                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                                         break;
2207                                 case 30:
2208                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2209                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2211                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212                                         break;
2213                                 default:
2214                                         gb_tile_moden = 0;
2215                                         break;
2216                                 }
2217                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2218                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2219                         }
2220                 }
2221                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2222                         switch (reg_offset) {
2223                         case 0:
2224                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2227                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2228                                 break;
2229                         case 1:
2230                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2232                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2233                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2234                                 break;
2235                         case 2:
2236                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2240                                 break;
2241                         case 3:
2242                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2246                                 break;
2247                         case 4:
2248                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2250                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2252                                 break;
2253                         case 5:
2254                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2258                                 break;
2259                         case 6:
2260                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2264                                 break;
2265                         case 8:
2266                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2268                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2270                                 break;
2271                         case 9:
2272                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2273                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2276                                 break;
2277                         case 10:
2278                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2282                                 break;
2283                         case 11:
2284                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2288                                 break;
2289                         case 12:
2290                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2294                                 break;
2295                         case 13:
2296                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2298                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2300                                 break;
2301                         case 14:
2302                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2305                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2306                                 break;
2307                         default:
2308                                 gb_tile_moden = 0;
2309                                 break;
2310                         }
2311                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2312                 }
2313         } else if (num_pipe_configs == 2) {
2314                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2315                         switch (reg_offset) {
2316                         case 0:
2317                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2321                                 break;
2322                         case 1:
2323                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2326                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2327                                 break;
2328                         case 2:
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2332                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2333                                 break;
2334                         case 3:
2335                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2338                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2339                                 break;
2340                         case 4:
2341                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2344                                                  TILE_SPLIT(split_equal_to_row_size));
2345                                 break;
2346                         case 5:
2347                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349                                 break;
2350                         case 6:
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2354                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355                                 break;
2356                         case 7:
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2360                                                  TILE_SPLIT(split_equal_to_row_size));
2361                                 break;
2362                         case 8:
2363                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2364                                 break;
2365                         case 9:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368                                 break;
2369                         case 10:
2370                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2373                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374                                 break;
2375                         case 11:
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2379                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380                                 break;
2381                         case 12:
2382                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2385                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386                                 break;
2387                         case 13:
2388                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2390                                 break;
2391                         case 14:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2395                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                                 break;
2397                         case 16:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2401                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402                                 break;
2403                         case 17:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2406                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2407                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408                                 break;
2409                         case 27:
2410                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2411                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2412                                 break;
2413                         case 28:
2414                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2417                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                                 break;
2419                         case 29:
2420                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2423                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424                                 break;
2425                         case 30:
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2429                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2430                                 break;
2431                         default:
2432                                 gb_tile_moden = 0;
2433                                 break;
2434                         }
2435                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2436                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2437                 }
2438                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2439                         switch (reg_offset) {
2440                         case 0:
2441                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2442                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2444                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2445                                 break;
2446                         case 1:
2447                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2450                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2451                                 break;
2452                         case 2:
2453                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2456                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2457                                 break;
2458                         case 3:
2459                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2463                                 break;
2464                         case 4:
2465                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2469                                 break;
2470                         case 5:
2471                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2475                                 break;
2476                         case 6:
2477                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                                 break;
2482                         case 8:
2483                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2484                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2485                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2487                                 break;
2488                         case 9:
2489                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2490                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2493                                 break;
2494                         case 10:
2495                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2496                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2497                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2498                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2499                                 break;
2500                         case 11:
2501                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2502                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2503                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2505                                 break;
2506                         case 12:
2507                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2511                                 break;
2512                         case 13:
2513                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2517                                 break;
2518                         case 14:
2519                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2523                                 break;
2524                         default:
2525                                 gb_tile_moden = 0;
2526                                 break;
2527                         }
2528                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2529                 }
2530         } else
2531                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2532 }
2533
2534 /**
2535  * cik_select_se_sh - select which SE, SH to address
2536  *
2537  * @rdev: radeon_device pointer
2538  * @se_num: shader engine to address
2539  * @sh_num: sh block to address
2540  *
2541  * Select which SE, SH combinations to address. Certain
2542  * registers are instanced per SE or SH.  0xffffffff means
2543  * broadcast to all SEs or SHs (CIK).
2544  */
2545 static void cik_select_se_sh(struct radeon_device *rdev,
2546                              u32 se_num, u32 sh_num)
2547 {
2548         u32 data = INSTANCE_BROADCAST_WRITES;
2549
2550         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2551                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2552         else if (se_num == 0xffffffff)
2553                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2554         else if (sh_num == 0xffffffff)
2555                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2556         else
2557                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2558         WREG32(GRBM_GFX_INDEX, data);
2559 }
2560
2561 /**
2562  * cik_create_bitmask - create a bitmask
2563  *
2564  * @bit_width: length of the mask
2565  *
2566  * create a variable length bit mask (CIK).
2567  * Returns the bitmask.
2568  */
2569 static u32 cik_create_bitmask(u32 bit_width)
2570 {
2571         u32 i, mask = 0;
2572
2573         for (i = 0; i < bit_width; i++) {
2574                 mask <<= 1;
2575                 mask |= 1;
2576         }
2577         return mask;
2578 }
2579
2580 /**
2581  * cik_select_se_sh - select which SE, SH to address
2582  *
2583  * @rdev: radeon_device pointer
2584  * @max_rb_num: max RBs (render backends) for the asic
2585  * @se_num: number of SEs (shader engines) for the asic
2586  * @sh_per_se: number of SH blocks per SE for the asic
2587  *
2588  * Calculates the bitmask of disabled RBs (CIK).
2589  * Returns the disabled RB bitmask.
2590  */
2591 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2592                               u32 max_rb_num, u32 se_num,
2593                               u32 sh_per_se)
2594 {
2595         u32 data, mask;
2596
2597         data = RREG32(CC_RB_BACKEND_DISABLE);
2598         if (data & 1)
2599                 data &= BACKEND_DISABLE_MASK;
2600         else
2601                 data = 0;
2602         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2603
2604         data >>= BACKEND_DISABLE_SHIFT;
2605
2606         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2607
2608         return data & mask;
2609 }
2610
2611 /**
2612  * cik_setup_rb - setup the RBs on the asic
2613  *
2614  * @rdev: radeon_device pointer
2615  * @se_num: number of SEs (shader engines) for the asic
2616  * @sh_per_se: number of SH blocks per SE for the asic
2617  * @max_rb_num: max RBs (render backends) for the asic
2618  *
2619  * Configures per-SE/SH RB registers (CIK).
2620  */
2621 static void cik_setup_rb(struct radeon_device *rdev,
2622                          u32 se_num, u32 sh_per_se,
2623                          u32 max_rb_num)
2624 {
2625         int i, j;
2626         u32 data, mask;
2627         u32 disabled_rbs = 0;
2628         u32 enabled_rbs = 0;
2629
2630         for (i = 0; i < se_num; i++) {
2631                 for (j = 0; j < sh_per_se; j++) {
2632                         cik_select_se_sh(rdev, i, j);
2633                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2634                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2635                 }
2636         }
2637         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2638
2639         mask = 1;
2640         for (i = 0; i < max_rb_num; i++) {
2641                 if (!(disabled_rbs & mask))
2642                         enabled_rbs |= mask;
2643                 mask <<= 1;
2644         }
2645
2646         for (i = 0; i < se_num; i++) {
2647                 cik_select_se_sh(rdev, i, 0xffffffff);
2648                 data = 0;
2649                 for (j = 0; j < sh_per_se; j++) {
2650                         switch (enabled_rbs & 3) {
2651                         case 1:
2652                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2653                                 break;
2654                         case 2:
2655                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2656                                 break;
2657                         case 3:
2658                         default:
2659                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2660                                 break;
2661                         }
2662                         enabled_rbs >>= 2;
2663                 }
2664                 WREG32(PA_SC_RASTER_CONFIG, data);
2665         }
2666         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2667 }
2668
2669 /**
2670  * cik_gpu_init - setup the 3D engine
2671  *
2672  * @rdev: radeon_device pointer
2673  *
2674  * Configures the 3D engine and tiling configuration
2675  * registers so that the 3D engine is usable.
2676  */
2677 static void cik_gpu_init(struct radeon_device *rdev)
2678 {
2679         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2680         u32 mc_shared_chmap, mc_arb_ramcfg;
2681         u32 hdp_host_path_cntl;
2682         u32 tmp;
2683         int i, j;
2684
2685         switch (rdev->family) {
2686         case CHIP_BONAIRE:
2687                 rdev->config.cik.max_shader_engines = 2;
2688                 rdev->config.cik.max_tile_pipes = 4;
2689                 rdev->config.cik.max_cu_per_sh = 7;
2690                 rdev->config.cik.max_sh_per_se = 1;
2691                 rdev->config.cik.max_backends_per_se = 2;
2692                 rdev->config.cik.max_texture_channel_caches = 4;
2693                 rdev->config.cik.max_gprs = 256;
2694                 rdev->config.cik.max_gs_threads = 32;
2695                 rdev->config.cik.max_hw_contexts = 8;
2696
2697                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2698                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2699                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2700                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2701                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2702                 break;
2703         case CHIP_KAVERI:
2704                 /* TODO */
2705                 break;
2706         case CHIP_KABINI:
2707         default:
2708                 rdev->config.cik.max_shader_engines = 1;
2709                 rdev->config.cik.max_tile_pipes = 2;
2710                 rdev->config.cik.max_cu_per_sh = 2;
2711                 rdev->config.cik.max_sh_per_se = 1;
2712                 rdev->config.cik.max_backends_per_se = 1;
2713                 rdev->config.cik.max_texture_channel_caches = 2;
2714                 rdev->config.cik.max_gprs = 256;
2715                 rdev->config.cik.max_gs_threads = 16;
2716                 rdev->config.cik.max_hw_contexts = 8;
2717
2718                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2719                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2720                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2721                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2722                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2723                 break;
2724         }
2725
2726         /* Initialize HDP */
2727         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2728                 WREG32((0x2c14 + j), 0x00000000);
2729                 WREG32((0x2c18 + j), 0x00000000);
2730                 WREG32((0x2c1c + j), 0x00000000);
2731                 WREG32((0x2c20 + j), 0x00000000);
2732                 WREG32((0x2c24 + j), 0x00000000);
2733         }
2734
2735         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2736
2737         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2738
2739         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2740         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2741
2742         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2743         rdev->config.cik.mem_max_burst_length_bytes = 256;
2744         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2745         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2746         if (rdev->config.cik.mem_row_size_in_kb > 4)
2747                 rdev->config.cik.mem_row_size_in_kb = 4;
2748         /* XXX use MC settings? */
2749         rdev->config.cik.shader_engine_tile_size = 32;
2750         rdev->config.cik.num_gpus = 1;
2751         rdev->config.cik.multi_gpu_tile_size = 64;
2752
2753         /* fix up row size */
2754         gb_addr_config &= ~ROW_SIZE_MASK;
2755         switch (rdev->config.cik.mem_row_size_in_kb) {
2756         case 1:
2757         default:
2758                 gb_addr_config |= ROW_SIZE(0);
2759                 break;
2760         case 2:
2761                 gb_addr_config |= ROW_SIZE(1);
2762                 break;
2763         case 4:
2764                 gb_addr_config |= ROW_SIZE(2);
2765                 break;
2766         }
2767
2768         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2769          * not have bank info, so create a custom tiling dword.
2770          * bits 3:0   num_pipes
2771          * bits 7:4   num_banks
2772          * bits 11:8  group_size
2773          * bits 15:12 row_size
2774          */
2775         rdev->config.cik.tile_config = 0;
2776         switch (rdev->config.cik.num_tile_pipes) {
2777         case 1:
2778                 rdev->config.cik.tile_config |= (0 << 0);
2779                 break;
2780         case 2:
2781                 rdev->config.cik.tile_config |= (1 << 0);
2782                 break;
2783         case 4:
2784                 rdev->config.cik.tile_config |= (2 << 0);
2785                 break;
2786         case 8:
2787         default:
2788                 /* XXX what about 12? */
2789                 rdev->config.cik.tile_config |= (3 << 0);
2790                 break;
2791         }
2792         if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2793                 rdev->config.cik.tile_config |= 1 << 4;
2794         else
2795                 rdev->config.cik.tile_config |= 0 << 4;
2796         rdev->config.cik.tile_config |=
2797                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2798         rdev->config.cik.tile_config |=
2799                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2800
2801         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2802         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2803         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2804         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2805         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2806         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2807         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2808         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2809
2810         cik_tiling_mode_table_init(rdev);
2811
2812         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2813                      rdev->config.cik.max_sh_per_se,
2814                      rdev->config.cik.max_backends_per_se);
2815
2816         /* set HW defaults for 3D engine */
2817         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2818
2819         WREG32(SX_DEBUG_1, 0x20);
2820
2821         WREG32(TA_CNTL_AUX, 0x00010000);
2822
2823         tmp = RREG32(SPI_CONFIG_CNTL);
2824         tmp |= 0x03000000;
2825         WREG32(SPI_CONFIG_CNTL, tmp);
2826
2827         WREG32(SQ_CONFIG, 1);
2828
2829         WREG32(DB_DEBUG, 0);
2830
2831         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2832         tmp |= 0x00000400;
2833         WREG32(DB_DEBUG2, tmp);
2834
2835         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2836         tmp |= 0x00020200;
2837         WREG32(DB_DEBUG3, tmp);
2838
2839         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2840         tmp |= 0x00018208;
2841         WREG32(CB_HW_CONTROL, tmp);
2842
2843         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2844
2845         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2846                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2847                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2848                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2849
2850         WREG32(VGT_NUM_INSTANCES, 1);
2851
2852         WREG32(CP_PERFMON_CNTL, 0);
2853
2854         WREG32(SQ_CONFIG, 0);
2855
2856         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2857                                           FORCE_EOV_MAX_REZ_CNT(255)));
2858
2859         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2860                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2861
2862         WREG32(VGT_GS_VERTEX_REUSE, 16);
2863         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2864
2865         tmp = RREG32(HDP_MISC_CNTL);
2866         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2867         WREG32(HDP_MISC_CNTL, tmp);
2868
2869         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2870         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2871
2872         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2873         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2874
2875         udelay(50);
2876 }
2877
2878 /*
2879  * GPU scratch registers helpers function.
2880  */
2881 /**
2882  * cik_scratch_init - setup driver info for CP scratch regs
2883  *
2884  * @rdev: radeon_device pointer
2885  *
2886  * Set up the number and offset of the CP scratch registers.
2887  * NOTE: use of CP scratch registers is a legacy inferface and
2888  * is not used by default on newer asics (r6xx+).  On newer asics,
2889  * memory buffers are used for fences rather than scratch regs.
2890  */
2891 static void cik_scratch_init(struct radeon_device *rdev)
2892 {
2893         int i;
2894
2895         rdev->scratch.num_reg = 7;
2896         rdev->scratch.reg_base = SCRATCH_REG0;
2897         for (i = 0; i < rdev->scratch.num_reg; i++) {
2898                 rdev->scratch.free[i] = true;
2899                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2900         }
2901 }
2902
2903 /**
2904  * cik_ring_test - basic gfx ring test
2905  *
2906  * @rdev: radeon_device pointer
2907  * @ring: radeon_ring structure holding ring information
2908  *
2909  * Allocate a scratch register and write to it using the gfx ring (CIK).
2910  * Provides a basic gfx ring test to verify that the ring is working.
2911  * Used by cik_cp_gfx_resume();
2912  * Returns 0 on success, error on failure.
2913  */
2914 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2915 {
2916         uint32_t scratch;
2917         uint32_t tmp = 0;
2918         unsigned i;
2919         int r;
2920
2921         r = radeon_scratch_get(rdev, &scratch);
2922         if (r) {
2923                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2924                 return r;
2925         }
2926         WREG32(scratch, 0xCAFEDEAD);
2927         r = radeon_ring_lock(rdev, ring, 3);
2928         if (r) {
2929                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2930                 radeon_scratch_free(rdev, scratch);
2931                 return r;
2932         }
2933         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2934         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2935         radeon_ring_write(ring, 0xDEADBEEF);
2936         radeon_ring_unlock_commit(rdev, ring);
2937
2938         for (i = 0; i < rdev->usec_timeout; i++) {
2939                 tmp = RREG32(scratch);
2940                 if (tmp == 0xDEADBEEF)
2941                         break;
2942                 DRM_UDELAY(1);
2943         }
2944         if (i < rdev->usec_timeout) {
2945                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2946         } else {
2947                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2948                           ring->idx, scratch, tmp);
2949                 r = -EINVAL;
2950         }
2951         radeon_scratch_free(rdev, scratch);
2952         return r;
2953 }
2954
2955 /**
2956  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2957  *
2958  * @rdev: radeon_device pointer
2959  * @fence: radeon fence object
2960  *
2961  * Emits a fence sequnce number on the gfx ring and flushes
2962  * GPU caches.
2963  */
2964 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2965                              struct radeon_fence *fence)
2966 {
2967         struct radeon_ring *ring = &rdev->ring[fence->ring];
2968         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2969
2970         /* EVENT_WRITE_EOP - flush caches, send int */
2971         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2972         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2973                                  EOP_TC_ACTION_EN |
2974                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2975                                  EVENT_INDEX(5)));
2976         radeon_ring_write(ring, addr & 0xfffffffc);
2977         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2978         radeon_ring_write(ring, fence->seq);
2979         radeon_ring_write(ring, 0);
2980         /* HDP flush */
2981         /* We should be using the new WAIT_REG_MEM special op packet here
2982          * but it causes the CP to hang
2983          */
2984         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2985         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2986                                  WRITE_DATA_DST_SEL(0)));
2987         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2988         radeon_ring_write(ring, 0);
2989         radeon_ring_write(ring, 0);
2990 }
2991
2992 /**
2993  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2994  *
2995  * @rdev: radeon_device pointer
2996  * @fence: radeon fence object
2997  *
2998  * Emits a fence sequnce number on the compute ring and flushes
2999  * GPU caches.
3000  */
3001 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3002                                  struct radeon_fence *fence)
3003 {
3004         struct radeon_ring *ring = &rdev->ring[fence->ring];
3005         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3006
3007         /* RELEASE_MEM - flush caches, send int */
3008         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3009         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3010                                  EOP_TC_ACTION_EN |
3011                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3012                                  EVENT_INDEX(5)));
3013         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3014         radeon_ring_write(ring, addr & 0xfffffffc);
3015         radeon_ring_write(ring, upper_32_bits(addr));
3016         radeon_ring_write(ring, fence->seq);
3017         radeon_ring_write(ring, 0);
3018         /* HDP flush */
3019         /* We should be using the new WAIT_REG_MEM special op packet here
3020          * but it causes the CP to hang
3021          */
3022         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3023         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3024                                  WRITE_DATA_DST_SEL(0)));
3025         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3026         radeon_ring_write(ring, 0);
3027         radeon_ring_write(ring, 0);
3028 }
3029
3030 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3031                              struct radeon_ring *ring,
3032                              struct radeon_semaphore *semaphore,
3033                              bool emit_wait)
3034 {
3035         uint64_t addr = semaphore->gpu_addr;
3036         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3037
3038         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3039         radeon_ring_write(ring, addr & 0xffffffff);
3040         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3041 }
3042
3043 /*
3044  * IB stuff
3045  */
3046 /**
3047  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3048  *
3049  * @rdev: radeon_device pointer
3050  * @ib: radeon indirect buffer object
3051  *
3052  * Emits an DE (drawing engine) or CE (constant engine) IB
3053  * on the gfx ring.  IBs are usually generated by userspace
3054  * acceleration drivers and submitted to the kernel for
3055  * sheduling on the ring.  This function schedules the IB
3056  * on the gfx ring for execution by the GPU.
3057  */
3058 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3059 {
3060         struct radeon_ring *ring = &rdev->ring[ib->ring];
3061         u32 header, control = INDIRECT_BUFFER_VALID;
3062
3063         if (ib->is_const_ib) {
3064                 /* set switch buffer packet before const IB */
3065                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3066                 radeon_ring_write(ring, 0);
3067
3068                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3069         } else {
3070                 u32 next_rptr;
3071                 if (ring->rptr_save_reg) {
3072                         next_rptr = ring->wptr + 3 + 4;
3073                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3074                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3075                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3076                         radeon_ring_write(ring, next_rptr);
3077                 } else if (rdev->wb.enabled) {
3078                         next_rptr = ring->wptr + 5 + 4;
3079                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3081                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3082                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3083                         radeon_ring_write(ring, next_rptr);
3084                 }
3085
3086                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3087         }
3088
3089         control |= ib->length_dw |
3090                 (ib->vm ? (ib->vm->id << 24) : 0);
3091
3092         radeon_ring_write(ring, header);
3093         radeon_ring_write(ring,
3094 #ifdef __BIG_ENDIAN
3095                           (2 << 0) |
3096 #endif
3097                           (ib->gpu_addr & 0xFFFFFFFC));
3098         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3099         radeon_ring_write(ring, control);
3100 }
3101
3102 /**
3103  * cik_ib_test - basic gfx ring IB test
3104  *
3105  * @rdev: radeon_device pointer
3106  * @ring: radeon_ring structure holding ring information
3107  *
3108  * Allocate an IB and execute it on the gfx ring (CIK).
3109  * Provides a basic gfx ring test to verify that IBs are working.
3110  * Returns 0 on success, error on failure.
3111  */
3112 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3113 {
3114         struct radeon_ib ib;
3115         uint32_t scratch;
3116         uint32_t tmp = 0;
3117         unsigned i;
3118         int r;
3119
3120         r = radeon_scratch_get(rdev, &scratch);
3121         if (r) {
3122                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3123                 return r;
3124         }
3125         WREG32(scratch, 0xCAFEDEAD);
3126         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3127         if (r) {
3128                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3129                 return r;
3130         }
3131         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3132         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3133         ib.ptr[2] = 0xDEADBEEF;
3134         ib.length_dw = 3;
3135         r = radeon_ib_schedule(rdev, &ib, NULL);
3136         if (r) {
3137                 radeon_scratch_free(rdev, scratch);
3138                 radeon_ib_free(rdev, &ib);
3139                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3140                 return r;
3141         }
3142         r = radeon_fence_wait(ib.fence, false);
3143         if (r) {
3144                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3145                 return r;
3146         }
3147         for (i = 0; i < rdev->usec_timeout; i++) {
3148                 tmp = RREG32(scratch);
3149                 if (tmp == 0xDEADBEEF)
3150                         break;
3151                 DRM_UDELAY(1);
3152         }
3153         if (i < rdev->usec_timeout) {
3154                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3155         } else {
3156                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3157                           scratch, tmp);
3158                 r = -EINVAL;
3159         }
3160         radeon_scratch_free(rdev, scratch);
3161         radeon_ib_free(rdev, &ib);
3162         return r;
3163 }
3164
3165 /*
3166  * CP.
3167  * On CIK, gfx and compute now have independant command processors.
3168  *
3169  * GFX
3170  * Gfx consists of a single ring and can process both gfx jobs and
3171  * compute jobs.  The gfx CP consists of three microengines (ME):
3172  * PFP - Pre-Fetch Parser
3173  * ME - Micro Engine
3174  * CE - Constant Engine
3175  * The PFP and ME make up what is considered the Drawing Engine (DE).
3176  * The CE is an asynchronous engine used for updating buffer desciptors
3177  * used by the DE so that they can be loaded into cache in parallel
3178  * while the DE is processing state update packets.
3179  *
3180  * Compute
3181  * The compute CP consists of two microengines (ME):
3182  * MEC1 - Compute MicroEngine 1
3183  * MEC2 - Compute MicroEngine 2
3184  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3185  * The queues are exposed to userspace and are programmed directly
3186  * by the compute runtime.
3187  */
3188 /**
3189  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3190  *
3191  * @rdev: radeon_device pointer
3192  * @enable: enable or disable the MEs
3193  *
3194  * Halts or unhalts the gfx MEs.
3195  */
3196 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3197 {
3198         if (enable)
3199                 WREG32(CP_ME_CNTL, 0);
3200         else {
3201                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3202                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3203         }
3204         udelay(50);
3205 }
3206
3207 /**
3208  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3209  *
3210  * @rdev: radeon_device pointer
3211  *
3212  * Loads the gfx PFP, ME, and CE ucode.
3213  * Returns 0 for success, -EINVAL if the ucode is not available.
3214  */
3215 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3216 {
3217         const __be32 *fw_data;
3218         int i;
3219
3220         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3221                 return -EINVAL;
3222
3223         cik_cp_gfx_enable(rdev, false);
3224
3225         /* PFP */
3226         fw_data = (const __be32 *)rdev->pfp_fw->data;
3227         WREG32(CP_PFP_UCODE_ADDR, 0);
3228         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3229                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3230         WREG32(CP_PFP_UCODE_ADDR, 0);
3231
3232         /* CE */
3233         fw_data = (const __be32 *)rdev->ce_fw->data;
3234         WREG32(CP_CE_UCODE_ADDR, 0);
3235         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3236                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3237         WREG32(CP_CE_UCODE_ADDR, 0);
3238
3239         /* ME */
3240         fw_data = (const __be32 *)rdev->me_fw->data;
3241         WREG32(CP_ME_RAM_WADDR, 0);
3242         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3243                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3244         WREG32(CP_ME_RAM_WADDR, 0);
3245
3246         WREG32(CP_PFP_UCODE_ADDR, 0);
3247         WREG32(CP_CE_UCODE_ADDR, 0);
3248         WREG32(CP_ME_RAM_WADDR, 0);
3249         WREG32(CP_ME_RAM_RADDR, 0);
3250         return 0;
3251 }
3252
3253 /**
3254  * cik_cp_gfx_start - start the gfx ring
3255  *
3256  * @rdev: radeon_device pointer
3257  *
3258  * Enables the ring and loads the clear state context and other
3259  * packets required to init the ring.
3260  * Returns 0 for success, error for failure.
3261  */
3262 static int cik_cp_gfx_start(struct radeon_device *rdev)
3263 {
3264         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3265         int r, i;
3266
3267         /* init the CP */
3268         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3269         WREG32(CP_ENDIAN_SWAP, 0);
3270         WREG32(CP_DEVICE_ID, 1);
3271
3272         cik_cp_gfx_enable(rdev, true);
3273
3274         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3275         if (r) {
3276                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277                 return r;
3278         }
3279
3280         /* init the CE partitions.  CE only used for gfx on CIK */
3281         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3282         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3283         radeon_ring_write(ring, 0xc000);
3284         radeon_ring_write(ring, 0xc000);
3285
3286         /* setup clear context state */
3287         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3288         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3289
3290         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3291         radeon_ring_write(ring, 0x80000000);
3292         radeon_ring_write(ring, 0x80000000);
3293
3294         for (i = 0; i < cik_default_size; i++)
3295                 radeon_ring_write(ring, cik_default_state[i]);
3296
3297         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299
3300         /* set clear context state */
3301         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302         radeon_ring_write(ring, 0);
3303
3304         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3305         radeon_ring_write(ring, 0x00000316);
3306         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3307         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308
3309         radeon_ring_unlock_commit(rdev, ring);
3310
3311         return 0;
3312 }
3313
3314 /**
3315  * cik_cp_gfx_fini - stop the gfx ring
3316  *
3317  * @rdev: radeon_device pointer
3318  *
3319  * Stop the gfx ring and tear down the driver ring
3320  * info.
3321  */
3322 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3323 {
3324         cik_cp_gfx_enable(rdev, false);
3325         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3326 }
3327
3328 /**
3329  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3330  *
3331  * @rdev: radeon_device pointer
3332  *
3333  * Program the location and size of the gfx ring buffer
3334  * and test it to make sure it's working.
3335  * Returns 0 for success, error for failure.
3336  */
3337 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3338 {
3339         struct radeon_ring *ring;
3340         u32 tmp;
3341         u32 rb_bufsz;
3342         u64 rb_addr;
3343         int r;
3344
3345         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3346         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3347
3348         /* Set the write pointer delay */
3349         WREG32(CP_RB_WPTR_DELAY, 0);
3350
3351         /* set the RB to use vmid 0 */
3352         WREG32(CP_RB_VMID, 0);
3353
3354         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3355
3356         /* ring 0 - compute and gfx */
3357         /* Set ring buffer size */
3358         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3359         rb_bufsz = drm_order(ring->ring_size / 8);
3360         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3361 #ifdef __BIG_ENDIAN
3362         tmp |= BUF_SWAP_32BIT;
3363 #endif
3364         WREG32(CP_RB0_CNTL, tmp);
3365
3366         /* Initialize the ring buffer's read and write pointers */
3367         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3368         ring->wptr = 0;
3369         WREG32(CP_RB0_WPTR, ring->wptr);
3370
3371         /* set the wb address wether it's enabled or not */
3372         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3373         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3374
3375         /* scratch register shadowing is no longer supported */
3376         WREG32(SCRATCH_UMSK, 0);
3377
3378         if (!rdev->wb.enabled)
3379                 tmp |= RB_NO_UPDATE;
3380
3381         mdelay(1);
3382         WREG32(CP_RB0_CNTL, tmp);
3383
3384         rb_addr = ring->gpu_addr >> 8;
3385         WREG32(CP_RB0_BASE, rb_addr);
3386         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3387
3388         ring->rptr = RREG32(CP_RB0_RPTR);
3389
3390         /* start the ring */
3391         cik_cp_gfx_start(rdev);
3392         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3393         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3394         if (r) {
3395                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3396                 return r;
3397         }
3398         return 0;
3399 }
3400
3401 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3402                               struct radeon_ring *ring)
3403 {
3404         u32 rptr;
3405
3406
3407
3408         if (rdev->wb.enabled) {
3409                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3410         } else {
3411                 mutex_lock(&rdev->srbm_mutex);
3412                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3413                 rptr = RREG32(CP_HQD_PQ_RPTR);
3414                 cik_srbm_select(rdev, 0, 0, 0, 0);
3415                 mutex_unlock(&rdev->srbm_mutex);
3416         }
3417         rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3418
3419         return rptr;
3420 }
3421
3422 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3423                               struct radeon_ring *ring)
3424 {
3425         u32 wptr;
3426
3427         if (rdev->wb.enabled) {
3428                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3429         } else {
3430                 mutex_lock(&rdev->srbm_mutex);
3431                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3432                 wptr = RREG32(CP_HQD_PQ_WPTR);
3433                 cik_srbm_select(rdev, 0, 0, 0, 0);
3434                 mutex_unlock(&rdev->srbm_mutex);
3435         }
3436         wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3437
3438         return wptr;
3439 }
3440
3441 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3442                                struct radeon_ring *ring)
3443 {
3444         u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3445
3446         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3447         WDOORBELL32(ring->doorbell_offset, wptr);
3448 }
3449
3450 /**
3451  * cik_cp_compute_enable - enable/disable the compute CP MEs
3452  *
3453  * @rdev: radeon_device pointer
3454  * @enable: enable or disable the MEs
3455  *
3456  * Halts or unhalts the compute MEs.
3457  */
3458 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3459 {
3460         if (enable)
3461                 WREG32(CP_MEC_CNTL, 0);
3462         else
3463                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3464         udelay(50);
3465 }
3466
3467 /**
3468  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3469  *
3470  * @rdev: radeon_device pointer
3471  *
3472  * Loads the compute MEC1&2 ucode.
3473  * Returns 0 for success, -EINVAL if the ucode is not available.
3474  */
3475 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3476 {
3477         const __be32 *fw_data;
3478         int i;
3479
3480         if (!rdev->mec_fw)
3481                 return -EINVAL;
3482
3483         cik_cp_compute_enable(rdev, false);
3484
3485         /* MEC1 */
3486         fw_data = (const __be32 *)rdev->mec_fw->data;
3487         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3488         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3489                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3490         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3491
3492         if (rdev->family == CHIP_KAVERI) {
3493                 /* MEC2 */
3494                 fw_data = (const __be32 *)rdev->mec_fw->data;
3495                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3496                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3497                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3498                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3499         }
3500
3501         return 0;
3502 }
3503
3504 /**
3505  * cik_cp_compute_start - start the compute queues
3506  *
3507  * @rdev: radeon_device pointer
3508  *
3509  * Enable the compute queues.
3510  * Returns 0 for success, error for failure.
3511  */
3512 static int cik_cp_compute_start(struct radeon_device *rdev)
3513 {
3514         cik_cp_compute_enable(rdev, true);
3515
3516         return 0;
3517 }
3518
3519 /**
3520  * cik_cp_compute_fini - stop the compute queues
3521  *
3522  * @rdev: radeon_device pointer
3523  *
3524  * Stop the compute queues and tear down the driver queue
3525  * info.
3526  */
3527 static void cik_cp_compute_fini(struct radeon_device *rdev)
3528 {
3529         int i, idx, r;
3530
3531         cik_cp_compute_enable(rdev, false);
3532
3533         for (i = 0; i < 2; i++) {
3534                 if (i == 0)
3535                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3536                 else
3537                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3538
3539                 if (rdev->ring[idx].mqd_obj) {
3540                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3541                         if (unlikely(r != 0))
3542                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3543
3544                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3545                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3546
3547                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3548                         rdev->ring[idx].mqd_obj = NULL;
3549                 }
3550         }
3551 }
3552
3553 static void cik_mec_fini(struct radeon_device *rdev)
3554 {
3555         int r;
3556
3557         if (rdev->mec.hpd_eop_obj) {
3558                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3559                 if (unlikely(r != 0))
3560                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3561                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3562                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3563
3564                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3565                 rdev->mec.hpd_eop_obj = NULL;
3566         }
3567 }
3568
3569 #define MEC_HPD_SIZE 2048
3570
3571 static int cik_mec_init(struct radeon_device *rdev)
3572 {
3573         int r;
3574         u32 *hpd;
3575
3576         /*
3577          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3578          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3579          */
3580         if (rdev->family == CHIP_KAVERI)
3581                 rdev->mec.num_mec = 2;
3582         else
3583                 rdev->mec.num_mec = 1;
3584         rdev->mec.num_pipe = 4;
3585         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3586
3587         if (rdev->mec.hpd_eop_obj == NULL) {
3588                 r = radeon_bo_create(rdev,
3589                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3590                                      PAGE_SIZE, true,
3591                                      RADEON_GEM_DOMAIN_GTT, NULL,
3592                                      &rdev->mec.hpd_eop_obj);
3593                 if (r) {
3594                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3595                         return r;
3596                 }
3597         }
3598
3599         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3600         if (unlikely(r != 0)) {
3601                 cik_mec_fini(rdev);
3602                 return r;
3603         }
3604         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3605                           &rdev->mec.hpd_eop_gpu_addr);
3606         if (r) {
3607                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3608                 cik_mec_fini(rdev);
3609                 return r;
3610         }
3611         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3612         if (r) {
3613                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3614                 cik_mec_fini(rdev);
3615                 return r;
3616         }
3617
3618         /* clear memory.  Not sure if this is required or not */
3619         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3620
3621         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3622         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3623
3624         return 0;
3625 }
3626
3627 struct hqd_registers
3628 {
3629         u32 cp_mqd_base_addr;
3630         u32 cp_mqd_base_addr_hi;
3631         u32 cp_hqd_active;
3632         u32 cp_hqd_vmid;
3633         u32 cp_hqd_persistent_state;
3634         u32 cp_hqd_pipe_priority;
3635         u32 cp_hqd_queue_priority;
3636         u32 cp_hqd_quantum;
3637         u32 cp_hqd_pq_base;
3638         u32 cp_hqd_pq_base_hi;
3639         u32 cp_hqd_pq_rptr;
3640         u32 cp_hqd_pq_rptr_report_addr;
3641         u32 cp_hqd_pq_rptr_report_addr_hi;
3642         u32 cp_hqd_pq_wptr_poll_addr;
3643         u32 cp_hqd_pq_wptr_poll_addr_hi;
3644         u32 cp_hqd_pq_doorbell_control;
3645         u32 cp_hqd_pq_wptr;
3646         u32 cp_hqd_pq_control;
3647         u32 cp_hqd_ib_base_addr;
3648         u32 cp_hqd_ib_base_addr_hi;
3649         u32 cp_hqd_ib_rptr;
3650         u32 cp_hqd_ib_control;
3651         u32 cp_hqd_iq_timer;
3652         u32 cp_hqd_iq_rptr;
3653         u32 cp_hqd_dequeue_request;
3654         u32 cp_hqd_dma_offload;
3655         u32 cp_hqd_sema_cmd;
3656         u32 cp_hqd_msg_type;
3657         u32 cp_hqd_atomic0_preop_lo;
3658         u32 cp_hqd_atomic0_preop_hi;
3659         u32 cp_hqd_atomic1_preop_lo;
3660         u32 cp_hqd_atomic1_preop_hi;
3661         u32 cp_hqd_hq_scheduler0;
3662         u32 cp_hqd_hq_scheduler1;
3663         u32 cp_mqd_control;
3664 };
3665
3666 struct bonaire_mqd
3667 {
3668         u32 header;
3669         u32 dispatch_initiator;
3670         u32 dimensions[3];
3671         u32 start_idx[3];
3672         u32 num_threads[3];
3673         u32 pipeline_stat_enable;
3674         u32 perf_counter_enable;
3675         u32 pgm[2];
3676         u32 tba[2];
3677         u32 tma[2];
3678         u32 pgm_rsrc[2];
3679         u32 vmid;
3680         u32 resource_limits;
3681         u32 static_thread_mgmt01[2];
3682         u32 tmp_ring_size;
3683         u32 static_thread_mgmt23[2];
3684         u32 restart[3];
3685         u32 thread_trace_enable;
3686         u32 reserved1;
3687         u32 user_data[16];
3688         u32 vgtcs_invoke_count[2];
3689         struct hqd_registers queue_state;
3690         u32 dequeue_cntr;
3691         u32 interrupt_queue[64];
3692 };
3693
3694 /**
3695  * cik_cp_compute_resume - setup the compute queue registers
3696  *
3697  * @rdev: radeon_device pointer
3698  *
3699  * Program the compute queues and test them to make sure they
3700  * are working.
3701  * Returns 0 for success, error for failure.
3702  */
3703 static int cik_cp_compute_resume(struct radeon_device *rdev)
3704 {
3705         int r, i, idx;
3706         u32 tmp;
3707         bool use_doorbell = true;
3708         u64 hqd_gpu_addr;
3709         u64 mqd_gpu_addr;
3710         u64 eop_gpu_addr;
3711         u64 wb_gpu_addr;
3712         u32 *buf;
3713         struct bonaire_mqd *mqd;
3714
3715         r = cik_cp_compute_start(rdev);
3716         if (r)
3717                 return r;
3718
3719         /* fix up chicken bits */
3720         tmp = RREG32(CP_CPF_DEBUG);
3721         tmp |= (1 << 23);
3722         WREG32(CP_CPF_DEBUG, tmp);
3723
3724         /* init the pipes */
3725         mutex_lock(&rdev->srbm_mutex);
3726         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3727                 int me = (i < 4) ? 1 : 2;
3728                 int pipe = (i < 4) ? i : (i - 4);
3729
3730                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3731
3732                 cik_srbm_select(rdev, me, pipe, 0, 0);
3733
3734                 /* write the EOP addr */
3735                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3736                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3737
3738                 /* set the VMID assigned */
3739                 WREG32(CP_HPD_EOP_VMID, 0);
3740
3741                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3742                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3743                 tmp &= ~EOP_SIZE_MASK;
3744                 tmp |= drm_order(MEC_HPD_SIZE / 8);
3745                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3746         }
3747         cik_srbm_select(rdev, 0, 0, 0, 0);
3748         mutex_unlock(&rdev->srbm_mutex);
3749
3750         /* init the queues.  Just two for now. */
3751         for (i = 0; i < 2; i++) {
3752                 if (i == 0)
3753                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3754                 else
3755                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3756
3757                 if (rdev->ring[idx].mqd_obj == NULL) {
3758                         r = radeon_bo_create(rdev,
3759                                              sizeof(struct bonaire_mqd),
3760                                              PAGE_SIZE, true,
3761                                              RADEON_GEM_DOMAIN_GTT, NULL,
3762                                              &rdev->ring[idx].mqd_obj);
3763                         if (r) {
3764                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3765                                 return r;
3766                         }
3767                 }
3768
3769                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3770                 if (unlikely(r != 0)) {
3771                         cik_cp_compute_fini(rdev);
3772                         return r;
3773                 }
3774                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3775                                   &mqd_gpu_addr);
3776                 if (r) {
3777                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3778                         cik_cp_compute_fini(rdev);
3779                         return r;
3780                 }
3781                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3782                 if (r) {
3783                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3784                         cik_cp_compute_fini(rdev);
3785                         return r;
3786                 }
3787
3788                 /* doorbell offset */
3789                 rdev->ring[idx].doorbell_offset =
3790                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3791
3792                 /* init the mqd struct */
3793                 memset(buf, 0, sizeof(struct bonaire_mqd));
3794
3795                 mqd = (struct bonaire_mqd *)buf;
3796                 mqd->header = 0xC0310800;
3797                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3798                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3799                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3800                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3801
3802                 mutex_lock(&rdev->srbm_mutex);
3803                 cik_srbm_select(rdev, rdev->ring[idx].me,
3804                                 rdev->ring[idx].pipe,
3805                                 rdev->ring[idx].queue, 0);
3806
3807                 /* disable wptr polling */
3808                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3809                 tmp &= ~WPTR_POLL_EN;
3810                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3811
3812                 /* enable doorbell? */
3813                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3814                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3815                 if (use_doorbell)
3816                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3817                 else
3818                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3819                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3820                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3821
3822                 /* disable the queue if it's active */
3823                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3824                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3825                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3826                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3827                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3828                         for (i = 0; i < rdev->usec_timeout; i++) {
3829                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3830                                         break;
3831                                 udelay(1);
3832                         }
3833                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3834                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3835                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3836                 }
3837
3838                 /* set the pointer to the MQD */
3839                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3840                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3841                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3842                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3843                 /* set MQD vmid to 0 */
3844                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3845                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3846                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3847
3848                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3849                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3850                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3851                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3852                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3853                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3854
3855                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3856                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3857                 mqd->queue_state.cp_hqd_pq_control &=
3858                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3859
3860                 mqd->queue_state.cp_hqd_pq_control |=
3861                         drm_order(rdev->ring[idx].ring_size / 8);
3862                 mqd->queue_state.cp_hqd_pq_control |=
3863                         (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3864 #ifdef __BIG_ENDIAN
3865                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3866 #endif
3867                 mqd->queue_state.cp_hqd_pq_control &=
3868                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3869                 mqd->queue_state.cp_hqd_pq_control |=
3870                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3871                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3872
3873                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3874                 if (i == 0)
3875                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3876                 else
3877                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3878                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3879                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3880                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3881                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3882                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3883
3884                 /* set the wb address wether it's enabled or not */
3885                 if (i == 0)
3886                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3887                 else
3888                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3889                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3890                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3891                         upper_32_bits(wb_gpu_addr) & 0xffff;
3892                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3893                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3894                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3895                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3896
3897                 /* enable the doorbell if requested */
3898                 if (use_doorbell) {
3899                         mqd->queue_state.cp_hqd_pq_doorbell_control =
3900                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3901                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3902                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
3903                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3904                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3905                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
3906                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3907
3908                 } else {
3909                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3910                 }
3911                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3912                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3913
3914                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3915                 rdev->ring[idx].wptr = 0;
3916                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3917                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3918                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3919                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3920
3921                 /* set the vmid for the queue */
3922                 mqd->queue_state.cp_hqd_vmid = 0;
3923                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3924
3925                 /* activate the queue */
3926                 mqd->queue_state.cp_hqd_active = 1;
3927                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3928
3929                 cik_srbm_select(rdev, 0, 0, 0, 0);
3930                 mutex_unlock(&rdev->srbm_mutex);
3931
3932                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3933                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3934
3935                 rdev->ring[idx].ready = true;
3936                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3937                 if (r)
3938                         rdev->ring[idx].ready = false;
3939         }
3940
3941         return 0;
3942 }
3943
3944 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3945 {
3946         cik_cp_gfx_enable(rdev, enable);
3947         cik_cp_compute_enable(rdev, enable);
3948 }
3949
3950 static int cik_cp_load_microcode(struct radeon_device *rdev)
3951 {
3952         int r;
3953
3954         r = cik_cp_gfx_load_microcode(rdev);
3955         if (r)
3956                 return r;
3957         r = cik_cp_compute_load_microcode(rdev);
3958         if (r)
3959                 return r;
3960
3961         return 0;
3962 }
3963
3964 static void cik_cp_fini(struct radeon_device *rdev)
3965 {
3966         cik_cp_gfx_fini(rdev);
3967         cik_cp_compute_fini(rdev);
3968 }
3969
3970 static int cik_cp_resume(struct radeon_device *rdev)
3971 {
3972         int r;
3973
3974         /* Reset all cp blocks */
3975         WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3976         RREG32(GRBM_SOFT_RESET);
3977         mdelay(15);
3978         WREG32(GRBM_SOFT_RESET, 0);
3979         RREG32(GRBM_SOFT_RESET);
3980
3981         r = cik_cp_load_microcode(rdev);
3982         if (r)
3983                 return r;
3984
3985         r = cik_cp_gfx_resume(rdev);
3986         if (r)
3987                 return r;
3988         r = cik_cp_compute_resume(rdev);
3989         if (r)
3990                 return r;
3991
3992         return 0;
3993 }
3994
3995 /*
3996  * sDMA - System DMA
3997  * Starting with CIK, the GPU has new asynchronous
3998  * DMA engines.  These engines are used for compute
3999  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
4000  * and each one supports 1 ring buffer used for gfx
4001  * and 2 queues used for compute.
4002  *
4003  * The programming model is very similar to the CP
4004  * (ring buffer, IBs, etc.), but sDMA has it's own
4005  * packet format that is different from the PM4 format
4006  * used by the CP. sDMA supports copying data, writing
4007  * embedded data, solid fills, and a number of other
4008  * things.  It also has support for tiling/detiling of
4009  * buffers.
4010  */
4011 /**
4012  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4013  *
4014  * @rdev: radeon_device pointer
4015  * @ib: IB object to schedule
4016  *
4017  * Schedule an IB in the DMA ring (CIK).
4018  */
4019 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4020                               struct radeon_ib *ib)
4021 {
4022         struct radeon_ring *ring = &rdev->ring[ib->ring];
4023         u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4024
4025         if (rdev->wb.enabled) {
4026                 u32 next_rptr = ring->wptr + 5;
4027                 while ((next_rptr & 7) != 4)
4028                         next_rptr++;
4029                 next_rptr += 4;
4030                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4031                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4032                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4033                 radeon_ring_write(ring, 1); /* number of DWs to follow */
4034                 radeon_ring_write(ring, next_rptr);
4035         }
4036
4037         /* IB packet must end on a 8 DW boundary */
4038         while ((ring->wptr & 7) != 4)
4039                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4040         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4041         radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4042         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4043         radeon_ring_write(ring, ib->length_dw);
4044
4045 }
4046
4047 /**
4048  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4049  *
4050  * @rdev: radeon_device pointer
4051  * @fence: radeon fence object
4052  *
4053  * Add a DMA fence packet to the ring to write
4054  * the fence seq number and DMA trap packet to generate
4055  * an interrupt if needed (CIK).
4056  */
4057 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4058                               struct radeon_fence *fence)
4059 {
4060         struct radeon_ring *ring = &rdev->ring[fence->ring];
4061         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4062         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4063                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4064         u32 ref_and_mask;
4065
4066         if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4067                 ref_and_mask = SDMA0;
4068         else
4069                 ref_and_mask = SDMA1;
4070
4071         /* write the fence */
4072         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4073         radeon_ring_write(ring, addr & 0xffffffff);
4074         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4075         radeon_ring_write(ring, fence->seq);
4076         /* generate an interrupt */
4077         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4078         /* flush HDP */
4079         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4080         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4081         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4082         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4083         radeon_ring_write(ring, ref_and_mask); /* MASK */
4084         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4085 }
4086
4087 /**
4088  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4089  *
4090  * @rdev: radeon_device pointer
4091  * @ring: radeon_ring structure holding ring information
4092  * @semaphore: radeon semaphore object
4093  * @emit_wait: wait or signal semaphore
4094  *
4095  * Add a DMA semaphore packet to the ring wait on or signal
4096  * other rings (CIK).
4097  */
4098 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4099                                   struct radeon_ring *ring,
4100                                   struct radeon_semaphore *semaphore,
4101                                   bool emit_wait)
4102 {
4103         u64 addr = semaphore->gpu_addr;
4104         u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4105
4106         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4107         radeon_ring_write(ring, addr & 0xfffffff8);
4108         radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4109 }
4110
4111 /**
4112  * cik_sdma_gfx_stop - stop the gfx async dma engines
4113  *
4114  * @rdev: radeon_device pointer
4115  *
4116  * Stop the gfx async dma ring buffers (CIK).
4117  */
4118 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4119 {
4120         u32 rb_cntl, reg_offset;
4121         int i;
4122
4123         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4124
4125         for (i = 0; i < 2; i++) {
4126                 if (i == 0)
4127                         reg_offset = SDMA0_REGISTER_OFFSET;
4128                 else
4129                         reg_offset = SDMA1_REGISTER_OFFSET;
4130                 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4131                 rb_cntl &= ~SDMA_RB_ENABLE;
4132                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4133                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4134         }
4135 }
4136
4137 /**
4138  * cik_sdma_rlc_stop - stop the compute async dma engines
4139  *
4140  * @rdev: radeon_device pointer
4141  *
4142  * Stop the compute async dma queues (CIK).
4143  */
4144 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4145 {
4146         /* XXX todo */
4147 }
4148
4149 /**
4150  * cik_sdma_enable - stop the async dma engines
4151  *
4152  * @rdev: radeon_device pointer
4153  * @enable: enable/disable the DMA MEs.
4154  *
4155  * Halt or unhalt the async dma engines (CIK).
4156  */
4157 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4158 {
4159         u32 me_cntl, reg_offset;
4160         int i;
4161
4162         for (i = 0; i < 2; i++) {
4163                 if (i == 0)
4164                         reg_offset = SDMA0_REGISTER_OFFSET;
4165                 else
4166                         reg_offset = SDMA1_REGISTER_OFFSET;
4167                 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4168                 if (enable)
4169                         me_cntl &= ~SDMA_HALT;
4170                 else
4171                         me_cntl |= SDMA_HALT;
4172                 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4173         }
4174 }
4175
4176 /**
4177  * cik_sdma_gfx_resume - setup and start the async dma engines
4178  *
4179  * @rdev: radeon_device pointer
4180  *
4181  * Set up the gfx DMA ring buffers and enable them (CIK).
4182  * Returns 0 for success, error for failure.
4183  */
4184 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4185 {
4186         struct radeon_ring *ring;
4187         u32 rb_cntl, ib_cntl;
4188         u32 rb_bufsz;
4189         u32 reg_offset, wb_offset;
4190         int i, r;
4191
4192         for (i = 0; i < 2; i++) {
4193                 if (i == 0) {
4194                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4195                         reg_offset = SDMA0_REGISTER_OFFSET;
4196                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
4197                 } else {
4198                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4199                         reg_offset = SDMA1_REGISTER_OFFSET;
4200                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4201                 }
4202
4203                 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4204                 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4205
4206                 /* Set ring buffer size in dwords */
4207                 rb_bufsz = drm_order(ring->ring_size / 4);
4208                 rb_cntl = rb_bufsz << 1;
4209 #ifdef __BIG_ENDIAN
4210                 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4211 #endif
4212                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4213
4214                 /* Initialize the ring buffer's read and write pointers */
4215                 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4216                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4217
4218                 /* set the wb address whether it's enabled or not */
4219                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4220                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4221                 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4222                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4223
4224                 if (rdev->wb.enabled)
4225                         rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4226
4227                 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4228                 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4229
4230                 ring->wptr = 0;
4231                 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4232
4233                 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4234
4235                 /* enable DMA RB */
4236                 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4237
4238                 ib_cntl = SDMA_IB_ENABLE;
4239 #ifdef __BIG_ENDIAN
4240                 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4241 #endif
4242                 /* enable DMA IBs */
4243                 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4244
4245                 ring->ready = true;
4246
4247                 r = radeon_ring_test(rdev, ring->idx, ring);
4248                 if (r) {
4249                         ring->ready = false;
4250                         return r;
4251                 }
4252         }
4253
4254         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4255
4256         return 0;
4257 }
4258
4259 /**
4260  * cik_sdma_rlc_resume - setup and start the async dma engines
4261  *
4262  * @rdev: radeon_device pointer
4263  *
4264  * Set up the compute DMA queues and enable them (CIK).
4265  * Returns 0 for success, error for failure.
4266  */
4267 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4268 {
4269         /* XXX todo */
4270         return 0;
4271 }
4272
4273 /**
4274  * cik_sdma_load_microcode - load the sDMA ME ucode
4275  *
4276  * @rdev: radeon_device pointer
4277  *
4278  * Loads the sDMA0/1 ucode.
4279  * Returns 0 for success, -EINVAL if the ucode is not available.
4280  */
4281 static int cik_sdma_load_microcode(struct radeon_device *rdev)
4282 {
4283         const __be32 *fw_data;
4284         int i;
4285
4286         if (!rdev->sdma_fw)
4287                 return -EINVAL;
4288
4289         /* stop the gfx rings and rlc compute queues */
4290         cik_sdma_gfx_stop(rdev);
4291         cik_sdma_rlc_stop(rdev);
4292
4293         /* halt the MEs */
4294         cik_sdma_enable(rdev, false);
4295
4296         /* sdma0 */
4297         fw_data = (const __be32 *)rdev->sdma_fw->data;
4298         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4299         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4300                 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4301         WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4302
4303         /* sdma1 */
4304         fw_data = (const __be32 *)rdev->sdma_fw->data;
4305         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4306         for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4307                 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4308         WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4309
4310         WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4311         WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4312         return 0;
4313 }
4314
4315 /**
4316  * cik_sdma_resume - setup and start the async dma engines
4317  *
4318  * @rdev: radeon_device pointer
4319  *
4320  * Set up the DMA engines and enable them (CIK).
4321  * Returns 0 for success, error for failure.
4322  */
4323 static int cik_sdma_resume(struct radeon_device *rdev)
4324 {
4325         int r;
4326
4327         /* Reset dma */
4328         WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4329         RREG32(SRBM_SOFT_RESET);
4330         udelay(50);
4331         WREG32(SRBM_SOFT_RESET, 0);
4332         RREG32(SRBM_SOFT_RESET);
4333
4334         r = cik_sdma_load_microcode(rdev);
4335         if (r)
4336                 return r;
4337
4338         /* unhalt the MEs */
4339         cik_sdma_enable(rdev, true);
4340
4341         /* start the gfx rings and rlc compute queues */
4342         r = cik_sdma_gfx_resume(rdev);
4343         if (r)
4344                 return r;
4345         r = cik_sdma_rlc_resume(rdev);
4346         if (r)
4347                 return r;
4348
4349         return 0;
4350 }
4351
4352 /**
4353  * cik_sdma_fini - tear down the async dma engines
4354  *
4355  * @rdev: radeon_device pointer
4356  *
4357  * Stop the async dma engines and free the rings (CIK).
4358  */
4359 static void cik_sdma_fini(struct radeon_device *rdev)
4360 {
4361         /* stop the gfx rings and rlc compute queues */
4362         cik_sdma_gfx_stop(rdev);
4363         cik_sdma_rlc_stop(rdev);
4364         /* halt the MEs */
4365         cik_sdma_enable(rdev, false);
4366         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4367         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4368         /* XXX - compute dma queue tear down */
4369 }
4370
4371 /**
4372  * cik_copy_dma - copy pages using the DMA engine
4373  *
4374  * @rdev: radeon_device pointer
4375  * @src_offset: src GPU address
4376  * @dst_offset: dst GPU address
4377  * @num_gpu_pages: number of GPU pages to xfer
4378  * @fence: radeon fence object
4379  *
4380  * Copy GPU paging using the DMA engine (CIK).
4381  * Used by the radeon ttm implementation to move pages if
4382  * registered as the asic copy callback.
4383  */
4384 int cik_copy_dma(struct radeon_device *rdev,
4385                  uint64_t src_offset, uint64_t dst_offset,
4386                  unsigned num_gpu_pages,
4387                  struct radeon_fence **fence)
4388 {
4389         struct radeon_semaphore *sem = NULL;
4390         int ring_index = rdev->asic->copy.dma_ring_index;
4391         struct radeon_ring *ring = &rdev->ring[ring_index];
4392         u32 size_in_bytes, cur_size_in_bytes;
4393         int i, num_loops;
4394         int r = 0;
4395
4396         r = radeon_semaphore_create(rdev, &sem);
4397         if (r) {
4398                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4399                 return r;
4400         }
4401
4402         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4403         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4404         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4405         if (r) {
4406                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4407                 radeon_semaphore_free(rdev, &sem, NULL);
4408                 return r;
4409         }
4410
4411         if (radeon_fence_need_sync(*fence, ring->idx)) {
4412                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4413                                             ring->idx);
4414                 radeon_fence_note_sync(*fence, ring->idx);
4415         } else {
4416                 radeon_semaphore_free(rdev, &sem, NULL);
4417         }
4418
4419         for (i = 0; i < num_loops; i++) {
4420                 cur_size_in_bytes = size_in_bytes;
4421                 if (cur_size_in_bytes > 0x1fffff)
4422                         cur_size_in_bytes = 0x1fffff;
4423                 size_in_bytes -= cur_size_in_bytes;
4424                 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4425                 radeon_ring_write(ring, cur_size_in_bytes);
4426                 radeon_ring_write(ring, 0); /* src/dst endian swap */
4427                 radeon_ring_write(ring, src_offset & 0xffffffff);
4428                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4429                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4430                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4431                 src_offset += cur_size_in_bytes;
4432                 dst_offset += cur_size_in_bytes;
4433         }
4434
4435         r = radeon_fence_emit(rdev, fence, ring->idx);
4436         if (r) {
4437                 radeon_ring_unlock_undo(rdev, ring);
4438                 return r;
4439         }
4440
4441         radeon_ring_unlock_commit(rdev, ring);
4442         radeon_semaphore_free(rdev, &sem, *fence);
4443
4444         return r;
4445 }
4446
4447 /**
4448  * cik_sdma_ring_test - simple async dma engine test
4449  *
4450  * @rdev: radeon_device pointer
4451  * @ring: radeon_ring structure holding ring information
4452  *
4453  * Test the DMA engine by writing using it to write an
4454  * value to memory. (CIK).
4455  * Returns 0 for success, error for failure.
4456  */
4457 int cik_sdma_ring_test(struct radeon_device *rdev,
4458                        struct radeon_ring *ring)
4459 {
4460         unsigned i;
4461         int r;
4462         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4463         u32 tmp;
4464
4465         if (!ptr) {
4466                 DRM_ERROR("invalid vram scratch pointer\n");
4467                 return -EINVAL;
4468         }
4469
4470         tmp = 0xCAFEDEAD;
4471         writel(tmp, ptr);
4472
4473         r = radeon_ring_lock(rdev, ring, 4);
4474         if (r) {
4475                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4476                 return r;
4477         }
4478         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4479         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4480         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4481         radeon_ring_write(ring, 1); /* number of DWs to follow */
4482         radeon_ring_write(ring, 0xDEADBEEF);
4483         radeon_ring_unlock_commit(rdev, ring);
4484
4485         for (i = 0; i < rdev->usec_timeout; i++) {
4486                 tmp = readl(ptr);
4487                 if (tmp == 0xDEADBEEF)
4488                         break;
4489                 DRM_UDELAY(1);
4490         }
4491
4492         if (i < rdev->usec_timeout) {
4493                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4494         } else {
4495                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4496                           ring->idx, tmp);
4497                 r = -EINVAL;
4498         }
4499         return r;
4500 }
4501
4502 /**
4503  * cik_sdma_ib_test - test an IB on the DMA engine
4504  *
4505  * @rdev: radeon_device pointer
4506  * @ring: radeon_ring structure holding ring information
4507  *
4508  * Test a simple IB in the DMA ring (CIK).
4509  * Returns 0 on success, error on failure.
4510  */
4511 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4512 {
4513         struct radeon_ib ib;
4514         unsigned i;
4515         int r;
4516         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4517         u32 tmp = 0;
4518
4519         if (!ptr) {
4520                 DRM_ERROR("invalid vram scratch pointer\n");
4521                 return -EINVAL;
4522         }
4523
4524         tmp = 0xCAFEDEAD;
4525         writel(tmp, ptr);
4526
4527         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4528         if (r) {
4529                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4530                 return r;
4531         }
4532
4533         ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4534         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4535         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4536         ib.ptr[3] = 1;
4537         ib.ptr[4] = 0xDEADBEEF;
4538         ib.length_dw = 5;
4539
4540         r = radeon_ib_schedule(rdev, &ib, NULL);
4541         if (r) {
4542                 radeon_ib_free(rdev, &ib);
4543                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4544                 return r;
4545         }
4546         r = radeon_fence_wait(ib.fence, false);
4547         if (r) {
4548                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4549                 return r;
4550         }
4551         for (i = 0; i < rdev->usec_timeout; i++) {
4552                 tmp = readl(ptr);
4553                 if (tmp == 0xDEADBEEF)
4554                         break;
4555                 DRM_UDELAY(1);
4556         }
4557         if (i < rdev->usec_timeout) {
4558                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4559         } else {
4560                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4561                 r = -EINVAL;
4562         }
4563         radeon_ib_free(rdev, &ib);
4564         return r;
4565 }
4566
4567
4568 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4569 {
4570         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4571                 RREG32(GRBM_STATUS));
4572         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4573                 RREG32(GRBM_STATUS2));
4574         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4575                 RREG32(GRBM_STATUS_SE0));
4576         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4577                 RREG32(GRBM_STATUS_SE1));
4578         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4579                 RREG32(GRBM_STATUS_SE2));
4580         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4581                 RREG32(GRBM_STATUS_SE3));
4582         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4583                 RREG32(SRBM_STATUS));
4584         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4585                 RREG32(SRBM_STATUS2));
4586         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4587                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4588         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4589                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4590         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4591         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4592                  RREG32(CP_STALLED_STAT1));
4593         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4594                  RREG32(CP_STALLED_STAT2));
4595         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4596                  RREG32(CP_STALLED_STAT3));
4597         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4598                  RREG32(CP_CPF_BUSY_STAT));
4599         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4600                  RREG32(CP_CPF_STALLED_STAT1));
4601         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4602         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4603         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4604                  RREG32(CP_CPC_STALLED_STAT1));
4605         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4606 }
4607
4608 /**
4609  * cik_gpu_check_soft_reset - check which blocks are busy
4610  *
4611  * @rdev: radeon_device pointer
4612  *
4613  * Check which blocks are busy and return the relevant reset
4614  * mask to be used by cik_gpu_soft_reset().
4615  * Returns a mask of the blocks to be reset.
4616  */
4617 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4618 {
4619         u32 reset_mask = 0;
4620         u32 tmp;
4621
4622         /* GRBM_STATUS */
4623         tmp = RREG32(GRBM_STATUS);
4624         if (tmp & (PA_BUSY | SC_BUSY |
4625                    BCI_BUSY | SX_BUSY |
4626                    TA_BUSY | VGT_BUSY |
4627                    DB_BUSY | CB_BUSY |
4628                    GDS_BUSY | SPI_BUSY |
4629                    IA_BUSY | IA_BUSY_NO_DMA))
4630                 reset_mask |= RADEON_RESET_GFX;
4631
4632         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4633                 reset_mask |= RADEON_RESET_CP;
4634
4635         /* GRBM_STATUS2 */
4636         tmp = RREG32(GRBM_STATUS2);
4637         if (tmp & RLC_BUSY)
4638                 reset_mask |= RADEON_RESET_RLC;
4639
4640         /* SDMA0_STATUS_REG */
4641         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4642         if (!(tmp & SDMA_IDLE))
4643                 reset_mask |= RADEON_RESET_DMA;
4644
4645         /* SDMA1_STATUS_REG */
4646         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4647         if (!(tmp & SDMA_IDLE))
4648                 reset_mask |= RADEON_RESET_DMA1;
4649
4650         /* SRBM_STATUS2 */
4651         tmp = RREG32(SRBM_STATUS2);
4652         if (tmp & SDMA_BUSY)
4653                 reset_mask |= RADEON_RESET_DMA;
4654
4655         if (tmp & SDMA1_BUSY)
4656                 reset_mask |= RADEON_RESET_DMA1;
4657
4658         /* SRBM_STATUS */
4659         tmp = RREG32(SRBM_STATUS);
4660
4661         if (tmp & IH_BUSY)
4662                 reset_mask |= RADEON_RESET_IH;
4663
4664         if (tmp & SEM_BUSY)
4665                 reset_mask |= RADEON_RESET_SEM;
4666
4667         if (tmp & GRBM_RQ_PENDING)
4668                 reset_mask |= RADEON_RESET_GRBM;
4669
4670         if (tmp & VMC_BUSY)
4671                 reset_mask |= RADEON_RESET_VMC;
4672
4673         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4674                    MCC_BUSY | MCD_BUSY))
4675                 reset_mask |= RADEON_RESET_MC;
4676
4677         if (evergreen_is_display_hung(rdev))
4678                 reset_mask |= RADEON_RESET_DISPLAY;
4679
4680         /* Skip MC reset as it's mostly likely not hung, just busy */
4681         if (reset_mask & RADEON_RESET_MC) {
4682                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4683                 reset_mask &= ~RADEON_RESET_MC;
4684         }
4685
4686         return reset_mask;
4687 }
4688
4689 /**
4690  * cik_gpu_soft_reset - soft reset GPU
4691  *
4692  * @rdev: radeon_device pointer
4693  * @reset_mask: mask of which blocks to reset
4694  *
4695  * Soft reset the blocks specified in @reset_mask.
4696  */
4697 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4698 {
4699         struct evergreen_mc_save save;
4700         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4701         u32 tmp;
4702
4703         if (reset_mask == 0)
4704                 return;
4705
4706         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4707
4708         cik_print_gpu_status_regs(rdev);
4709         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4710                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4711         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4712                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4713
4714         /* stop the rlc */
4715         cik_rlc_stop(rdev);
4716
4717         /* Disable GFX parsing/prefetching */
4718         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4719
4720         /* Disable MEC parsing/prefetching */
4721         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4722
4723         if (reset_mask & RADEON_RESET_DMA) {
4724                 /* sdma0 */
4725                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4726                 tmp |= SDMA_HALT;
4727                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4728         }
4729         if (reset_mask & RADEON_RESET_DMA1) {
4730                 /* sdma1 */
4731                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4732                 tmp |= SDMA_HALT;
4733                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4734         }
4735
4736         evergreen_mc_stop(rdev, &save);
4737         if (evergreen_mc_wait_for_idle(rdev)) {
4738                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4739         }
4740
4741         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4742                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4743
4744         if (reset_mask & RADEON_RESET_CP) {
4745                 grbm_soft_reset |= SOFT_RESET_CP;
4746
4747                 srbm_soft_reset |= SOFT_RESET_GRBM;
4748         }
4749
4750         if (reset_mask & RADEON_RESET_DMA)
4751                 srbm_soft_reset |= SOFT_RESET_SDMA;
4752
4753         if (reset_mask & RADEON_RESET_DMA1)
4754                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4755
4756         if (reset_mask & RADEON_RESET_DISPLAY)
4757                 srbm_soft_reset |= SOFT_RESET_DC;
4758
4759         if (reset_mask & RADEON_RESET_RLC)
4760                 grbm_soft_reset |= SOFT_RESET_RLC;
4761
4762         if (reset_mask & RADEON_RESET_SEM)
4763                 srbm_soft_reset |= SOFT_RESET_SEM;
4764
4765         if (reset_mask & RADEON_RESET_IH)
4766                 srbm_soft_reset |= SOFT_RESET_IH;
4767
4768         if (reset_mask & RADEON_RESET_GRBM)
4769                 srbm_soft_reset |= SOFT_RESET_GRBM;
4770
4771         if (reset_mask & RADEON_RESET_VMC)
4772                 srbm_soft_reset |= SOFT_RESET_VMC;
4773
4774         if (!(rdev->flags & RADEON_IS_IGP)) {
4775                 if (reset_mask & RADEON_RESET_MC)
4776                         srbm_soft_reset |= SOFT_RESET_MC;
4777         }
4778
4779         if (grbm_soft_reset) {
4780                 tmp = RREG32(GRBM_SOFT_RESET);
4781                 tmp |= grbm_soft_reset;
4782                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4783                 WREG32(GRBM_SOFT_RESET, tmp);
4784                 tmp = RREG32(GRBM_SOFT_RESET);
4785
4786                 udelay(50);
4787
4788                 tmp &= ~grbm_soft_reset;
4789                 WREG32(GRBM_SOFT_RESET, tmp);
4790                 tmp = RREG32(GRBM_SOFT_RESET);
4791         }
4792
4793         if (srbm_soft_reset) {
4794                 tmp = RREG32(SRBM_SOFT_RESET);
4795                 tmp |= srbm_soft_reset;
4796                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4797                 WREG32(SRBM_SOFT_RESET, tmp);
4798                 tmp = RREG32(SRBM_SOFT_RESET);
4799
4800                 udelay(50);
4801
4802                 tmp &= ~srbm_soft_reset;
4803                 WREG32(SRBM_SOFT_RESET, tmp);
4804                 tmp = RREG32(SRBM_SOFT_RESET);
4805         }
4806
4807         /* Wait a little for things to settle down */
4808         udelay(50);
4809
4810         evergreen_mc_resume(rdev, &save);
4811         udelay(50);
4812
4813         cik_print_gpu_status_regs(rdev);
4814 }
4815
4816 /**
4817  * cik_asic_reset - soft reset GPU
4818  *
4819  * @rdev: radeon_device pointer
4820  *
4821  * Look up which blocks are hung and attempt
4822  * to reset them.
4823  * Returns 0 for success.
4824  */
4825 int cik_asic_reset(struct radeon_device *rdev)
4826 {
4827         u32 reset_mask;
4828
4829         reset_mask = cik_gpu_check_soft_reset(rdev);
4830
4831         if (reset_mask)
4832                 r600_set_bios_scratch_engine_hung(rdev, true);
4833
4834         cik_gpu_soft_reset(rdev, reset_mask);
4835
4836         reset_mask = cik_gpu_check_soft_reset(rdev);
4837
4838         if (!reset_mask)
4839                 r600_set_bios_scratch_engine_hung(rdev, false);
4840
4841         return 0;
4842 }
4843
4844 /**
4845  * cik_gfx_is_lockup - check if the 3D engine is locked up
4846  *
4847  * @rdev: radeon_device pointer
4848  * @ring: radeon_ring structure holding ring information
4849  *
4850  * Check if the 3D engine is locked up (CIK).
4851  * Returns true if the engine is locked, false if not.
4852  */
4853 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4854 {
4855         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4856
4857         if (!(reset_mask & (RADEON_RESET_GFX |
4858                             RADEON_RESET_COMPUTE |
4859                             RADEON_RESET_CP))) {
4860                 radeon_ring_lockup_update(ring);
4861                 return false;
4862         }
4863         /* force CP activities */
4864         radeon_ring_force_activity(rdev, ring);
4865         return radeon_ring_test_lockup(rdev, ring);
4866 }
4867
4868 /**
4869  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4870  *
4871  * @rdev: radeon_device pointer
4872  * @ring: radeon_ring structure holding ring information
4873  *
4874  * Check if the async DMA engine is locked up (CIK).
4875  * Returns true if the engine appears to be locked up, false if not.
4876  */
4877 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4878 {
4879         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4880         u32 mask;
4881
4882         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4883                 mask = RADEON_RESET_DMA;
4884         else
4885                 mask = RADEON_RESET_DMA1;
4886
4887         if (!(reset_mask & mask)) {
4888                 radeon_ring_lockup_update(ring);
4889                 return false;
4890         }
4891         /* force ring activities */
4892         radeon_ring_force_activity(rdev, ring);
4893         return radeon_ring_test_lockup(rdev, ring);
4894 }
4895
4896 /* MC */
4897 /**
4898  * cik_mc_program - program the GPU memory controller
4899  *
4900  * @rdev: radeon_device pointer
4901  *
4902  * Set the location of vram, gart, and AGP in the GPU's
4903  * physical address space (CIK).
4904  */
4905 static void cik_mc_program(struct radeon_device *rdev)
4906 {
4907         struct evergreen_mc_save save;
4908         u32 tmp;
4909         int i, j;
4910
4911         /* Initialize HDP */
4912         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4913                 WREG32((0x2c14 + j), 0x00000000);
4914                 WREG32((0x2c18 + j), 0x00000000);
4915                 WREG32((0x2c1c + j), 0x00000000);
4916                 WREG32((0x2c20 + j), 0x00000000);
4917                 WREG32((0x2c24 + j), 0x00000000);
4918         }
4919         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4920
4921         evergreen_mc_stop(rdev, &save);
4922         if (radeon_mc_wait_for_idle(rdev)) {
4923                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4924         }
4925         /* Lockout access through VGA aperture*/
4926         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4927         /* Update configuration */
4928         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4929                rdev->mc.vram_start >> 12);
4930         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4931                rdev->mc.vram_end >> 12);
4932         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4933                rdev->vram_scratch.gpu_addr >> 12);
4934         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4935         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4936         WREG32(MC_VM_FB_LOCATION, tmp);
4937         /* XXX double check these! */
4938         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4939         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4940         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4941         WREG32(MC_VM_AGP_BASE, 0);
4942         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4943         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4944         if (radeon_mc_wait_for_idle(rdev)) {
4945                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4946         }
4947         evergreen_mc_resume(rdev, &save);
4948         /* we need to own VRAM, so turn off the VGA renderer here
4949          * to stop it overwriting our objects */
4950         rv515_vga_render_disable(rdev);
4951 }
4952
4953 /**
4954  * cik_mc_init - initialize the memory controller driver params
4955  *
4956  * @rdev: radeon_device pointer
4957  *
4958  * Look up the amount of vram, vram width, and decide how to place
4959  * vram and gart within the GPU's physical address space (CIK).
4960  * Returns 0 for success.
4961  */
4962 static int cik_mc_init(struct radeon_device *rdev)
4963 {
4964         u32 tmp;
4965         int chansize, numchan;
4966
4967         /* Get VRAM informations */
4968         rdev->mc.vram_is_ddr = true;
4969         tmp = RREG32(MC_ARB_RAMCFG);
4970         if (tmp & CHANSIZE_MASK) {
4971                 chansize = 64;
4972         } else {
4973                 chansize = 32;
4974         }
4975         tmp = RREG32(MC_SHARED_CHMAP);
4976         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4977         case 0:
4978         default:
4979                 numchan = 1;
4980                 break;
4981         case 1:
4982                 numchan = 2;
4983                 break;
4984         case 2:
4985                 numchan = 4;
4986                 break;
4987         case 3:
4988                 numchan = 8;
4989                 break;
4990         case 4:
4991                 numchan = 3;
4992                 break;
4993         case 5:
4994                 numchan = 6;
4995                 break;
4996         case 6:
4997                 numchan = 10;
4998                 break;
4999         case 7:
5000                 numchan = 12;
5001                 break;
5002         case 8:
5003                 numchan = 16;
5004                 break;
5005         }
5006         rdev->mc.vram_width = numchan * chansize;
5007         /* Could aper size report 0 ? */
5008         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5009         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5010         /* size in MB on si */
5011         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5012         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5013         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5014         si_vram_gtt_location(rdev, &rdev->mc);
5015         radeon_update_bandwidth_info(rdev);
5016
5017         return 0;
5018 }
5019
5020 /*
5021  * GART
5022  * VMID 0 is the physical GPU addresses as used by the kernel.
5023  * VMIDs 1-15 are used for userspace clients and are handled
5024  * by the radeon vm/hsa code.
5025  */
5026 /**
5027  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5028  *
5029  * @rdev: radeon_device pointer
5030  *
5031  * Flush the TLB for the VMID 0 page table (CIK).
5032  */
5033 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5034 {
5035         /* flush hdp cache */
5036         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5037
5038         /* bits 0-15 are the VM contexts0-15 */
5039         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5040 }
5041
5042 /**
5043  * cik_pcie_gart_enable - gart enable
5044  *
5045  * @rdev: radeon_device pointer
5046  *
5047  * This sets up the TLBs, programs the page tables for VMID0,
5048  * sets up the hw for VMIDs 1-15 which are allocated on
5049  * demand, and sets up the global locations for the LDS, GDS,
5050  * and GPUVM for FSA64 clients (CIK).
5051  * Returns 0 for success, errors for failure.
5052  */
5053 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5054 {
5055         int r, i;
5056
5057         if (rdev->gart.robj == NULL) {
5058                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5059                 return -EINVAL;
5060         }
5061         r = radeon_gart_table_vram_pin(rdev);
5062         if (r)
5063                 return r;
5064         radeon_gart_restore(rdev);
5065         /* Setup TLB control */
5066         WREG32(MC_VM_MX_L1_TLB_CNTL,
5067                (0xA << 7) |
5068                ENABLE_L1_TLB |
5069                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5070                ENABLE_ADVANCED_DRIVER_MODEL |
5071                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5072         /* Setup L2 cache */
5073         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5074                ENABLE_L2_FRAGMENT_PROCESSING |
5075                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5076                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5077                EFFECTIVE_L2_QUEUE_SIZE(7) |
5078                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5079         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5080         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5081                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5082         /* setup context0 */
5083         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5084         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5085         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5086         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5087                         (u32)(rdev->dummy_page.addr >> 12));
5088         WREG32(VM_CONTEXT0_CNTL2, 0);
5089         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5090                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5091
5092         WREG32(0x15D4, 0);
5093         WREG32(0x15D8, 0);
5094         WREG32(0x15DC, 0);
5095
5096         /* empty context1-15 */
5097         /* FIXME start with 4G, once using 2 level pt switch to full
5098          * vm size space
5099          */
5100         /* set vm size, must be a multiple of 4 */
5101         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5102         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5103         for (i = 1; i < 16; i++) {
5104                 if (i < 8)
5105                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5106                                rdev->gart.table_addr >> 12);
5107                 else
5108                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5109                                rdev->gart.table_addr >> 12);
5110         }
5111
5112         /* enable context1-15 */
5113         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5114                (u32)(rdev->dummy_page.addr >> 12));
5115         WREG32(VM_CONTEXT1_CNTL2, 4);
5116         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5117                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5118                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5119                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5120                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5121                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5122                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5123                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5124                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5125                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5126                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5127                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5128                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5129
5130         /* TC cache setup ??? */
5131         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5132         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5133         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5134
5135         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5136         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5137         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5138         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5139         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5140
5141         WREG32(TC_CFG_L1_VOLATILE, 0);
5142         WREG32(TC_CFG_L2_VOLATILE, 0);
5143
5144         if (rdev->family == CHIP_KAVERI) {
5145                 u32 tmp = RREG32(CHUB_CONTROL);
5146                 tmp &= ~BYPASS_VM;
5147                 WREG32(CHUB_CONTROL, tmp);
5148         }
5149
5150         /* XXX SH_MEM regs */
5151         /* where to put LDS, scratch, GPUVM in FSA64 space */
5152         mutex_lock(&rdev->srbm_mutex);
5153         for (i = 0; i < 16; i++) {
5154                 cik_srbm_select(rdev, 0, 0, 0, i);
5155                 /* CP and shaders */
5156                 WREG32(SH_MEM_CONFIG, 0);
5157                 WREG32(SH_MEM_APE1_BASE, 1);
5158                 WREG32(SH_MEM_APE1_LIMIT, 0);
5159                 WREG32(SH_MEM_BASES, 0);
5160                 /* SDMA GFX */
5161                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5162                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5163                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5164                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5165                 /* XXX SDMA RLC - todo */
5166         }
5167         cik_srbm_select(rdev, 0, 0, 0, 0);
5168         mutex_unlock(&rdev->srbm_mutex);
5169
5170         cik_pcie_gart_tlb_flush(rdev);
5171         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5172                  (unsigned)(rdev->mc.gtt_size >> 20),
5173                  (unsigned long long)rdev->gart.table_addr);
5174         rdev->gart.ready = true;
5175         return 0;
5176 }
5177
5178 /**
5179  * cik_pcie_gart_disable - gart disable
5180  *
5181  * @rdev: radeon_device pointer
5182  *
5183  * This disables all VM page table (CIK).
5184  */
5185 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5186 {
5187         /* Disable all tables */
5188         WREG32(VM_CONTEXT0_CNTL, 0);
5189         WREG32(VM_CONTEXT1_CNTL, 0);
5190         /* Setup TLB control */
5191         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5192                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5193         /* Setup L2 cache */
5194         WREG32(VM_L2_CNTL,
5195                ENABLE_L2_FRAGMENT_PROCESSING |
5196                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5197                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5198                EFFECTIVE_L2_QUEUE_SIZE(7) |
5199                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5200         WREG32(VM_L2_CNTL2, 0);
5201         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5202                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5203         radeon_gart_table_vram_unpin(rdev);
5204 }
5205
5206 /**
5207  * cik_pcie_gart_fini - vm fini callback
5208  *
5209  * @rdev: radeon_device pointer
5210  *
5211  * Tears down the driver GART/VM setup (CIK).
5212  */
5213 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5214 {
5215         cik_pcie_gart_disable(rdev);
5216         radeon_gart_table_vram_free(rdev);
5217         radeon_gart_fini(rdev);
5218 }
5219
5220 /* vm parser */
5221 /**
5222  * cik_ib_parse - vm ib_parse callback
5223  *
5224  * @rdev: radeon_device pointer
5225  * @ib: indirect buffer pointer
5226  *
5227  * CIK uses hw IB checking so this is a nop (CIK).
5228  */
5229 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5230 {
5231         return 0;
5232 }
5233
5234 /*
5235  * vm
5236  * VMID 0 is the physical GPU addresses as used by the kernel.
5237  * VMIDs 1-15 are used for userspace clients and are handled
5238  * by the radeon vm/hsa code.
5239  */
5240 /**
5241  * cik_vm_init - cik vm init callback
5242  *
5243  * @rdev: radeon_device pointer
5244  *
5245  * Inits cik specific vm parameters (number of VMs, base of vram for
5246  * VMIDs 1-15) (CIK).
5247  * Returns 0 for success.
5248  */
5249 int cik_vm_init(struct radeon_device *rdev)
5250 {
5251         /* number of VMs */
5252         rdev->vm_manager.nvm = 16;
5253         /* base offset of vram pages */
5254         if (rdev->flags & RADEON_IS_IGP) {
5255                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5256                 tmp <<= 22;
5257                 rdev->vm_manager.vram_base_offset = tmp;
5258         } else
5259                 rdev->vm_manager.vram_base_offset = 0;
5260
5261         return 0;
5262 }
5263
5264 /**
5265  * cik_vm_fini - cik vm fini callback
5266  *
5267  * @rdev: radeon_device pointer
5268  *
5269  * Tear down any asic specific VM setup (CIK).
5270  */
5271 void cik_vm_fini(struct radeon_device *rdev)
5272 {
5273 }
5274
5275 /**
5276  * cik_vm_decode_fault - print human readable fault info
5277  *
5278  * @rdev: radeon_device pointer
5279  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5280  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5281  *
5282  * Print human readable fault information (CIK).
5283  */
5284 static void cik_vm_decode_fault(struct radeon_device *rdev,
5285                                 u32 status, u32 addr, u32 mc_client)
5286 {
5287         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5288         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5289         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5290         char *block = (char *)&mc_client;
5291
5292         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5293                protections, vmid, addr,
5294                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5295                block, mc_id);
5296 }
5297
5298 /**
5299  * cik_vm_flush - cik vm flush using the CP
5300  *
5301  * @rdev: radeon_device pointer
5302  *
5303  * Update the page table base and flush the VM TLB
5304  * using the CP (CIK).
5305  */
5306 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5307 {
5308         struct radeon_ring *ring = &rdev->ring[ridx];
5309
5310         if (vm == NULL)
5311                 return;
5312
5313         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5314         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5315                                  WRITE_DATA_DST_SEL(0)));
5316         if (vm->id < 8) {
5317                 radeon_ring_write(ring,
5318                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5319         } else {
5320                 radeon_ring_write(ring,
5321                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5322         }
5323         radeon_ring_write(ring, 0);
5324         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5325
5326         /* update SH_MEM_* regs */
5327         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5328         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5329                                  WRITE_DATA_DST_SEL(0)));
5330         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5331         radeon_ring_write(ring, 0);
5332         radeon_ring_write(ring, VMID(vm->id));
5333
5334         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5335         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5336                                  WRITE_DATA_DST_SEL(0)));
5337         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5338         radeon_ring_write(ring, 0);
5339
5340         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5341         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5342         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5343         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5344
5345         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5346         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5347                                  WRITE_DATA_DST_SEL(0)));
5348         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5349         radeon_ring_write(ring, 0);
5350         radeon_ring_write(ring, VMID(0));
5351
5352         /* HDP flush */
5353         /* We should be using the WAIT_REG_MEM packet here like in
5354          * cik_fence_ring_emit(), but it causes the CP to hang in this
5355          * context...
5356          */
5357         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5358         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5359                                  WRITE_DATA_DST_SEL(0)));
5360         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5361         radeon_ring_write(ring, 0);
5362         radeon_ring_write(ring, 0);
5363
5364         /* bits 0-15 are the VM contexts0-15 */
5365         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5366         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5367                                  WRITE_DATA_DST_SEL(0)));
5368         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5369         radeon_ring_write(ring, 0);
5370         radeon_ring_write(ring, 1 << vm->id);
5371
5372         /* compute doesn't have PFP */
5373         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5374                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5375                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5376                 radeon_ring_write(ring, 0x0);
5377         }
5378 }
5379
5380 /**
5381  * cik_vm_set_page - update the page tables using sDMA
5382  *
5383  * @rdev: radeon_device pointer
5384  * @ib: indirect buffer to fill with commands
5385  * @pe: addr of the page entry
5386  * @addr: dst addr to write into pe
5387  * @count: number of page entries to update
5388  * @incr: increase next addr by incr bytes
5389  * @flags: access flags
5390  *
5391  * Update the page tables using CP or sDMA (CIK).
5392  */
5393 void cik_vm_set_page(struct radeon_device *rdev,
5394                      struct radeon_ib *ib,
5395                      uint64_t pe,
5396                      uint64_t addr, unsigned count,
5397                      uint32_t incr, uint32_t flags)
5398 {
5399         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5400         uint64_t value;
5401         unsigned ndw;
5402
5403         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5404                 /* CP */
5405                 while (count) {
5406                         ndw = 2 + count * 2;
5407                         if (ndw > 0x3FFE)
5408                                 ndw = 0x3FFE;
5409
5410                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5411                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5412                                                     WRITE_DATA_DST_SEL(1));
5413                         ib->ptr[ib->length_dw++] = pe;
5414                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5415                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5416                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
5417                                         value = radeon_vm_map_gart(rdev, addr);
5418                                         value &= 0xFFFFFFFFFFFFF000ULL;
5419                                 } else if (flags & RADEON_VM_PAGE_VALID) {
5420                                         value = addr;
5421                                 } else {
5422                                         value = 0;
5423                                 }
5424                                 addr += incr;
5425                                 value |= r600_flags;
5426                                 ib->ptr[ib->length_dw++] = value;
5427                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5428                         }
5429                 }
5430         } else {
5431                 /* DMA */
5432                 if (flags & RADEON_VM_PAGE_SYSTEM) {
5433                         while (count) {
5434                                 ndw = count * 2;
5435                                 if (ndw > 0xFFFFE)
5436                                         ndw = 0xFFFFE;
5437
5438                                 /* for non-physically contiguous pages (system) */
5439                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5440                                 ib->ptr[ib->length_dw++] = pe;
5441                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5442                                 ib->ptr[ib->length_dw++] = ndw;
5443                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5444                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
5445                                                 value = radeon_vm_map_gart(rdev, addr);
5446                                                 value &= 0xFFFFFFFFFFFFF000ULL;
5447                                         } else if (flags & RADEON_VM_PAGE_VALID) {
5448                                                 value = addr;
5449                                         } else {
5450                                                 value = 0;
5451                                         }
5452                                         addr += incr;
5453                                         value |= r600_flags;
5454                                         ib->ptr[ib->length_dw++] = value;
5455                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
5456                                 }
5457                         }
5458                 } else {
5459                         while (count) {
5460                                 ndw = count;
5461                                 if (ndw > 0x7FFFF)
5462                                         ndw = 0x7FFFF;
5463
5464                                 if (flags & RADEON_VM_PAGE_VALID)
5465                                         value = addr;
5466                                 else
5467                                         value = 0;
5468                                 /* for physically contiguous pages (vram) */
5469                                 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5470                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5471                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5472                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5473                                 ib->ptr[ib->length_dw++] = 0;
5474                                 ib->ptr[ib->length_dw++] = value; /* value */
5475                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5476                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
5477                                 ib->ptr[ib->length_dw++] = 0;
5478                                 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5479                                 pe += ndw * 8;
5480                                 addr += ndw * incr;
5481                                 count -= ndw;
5482                         }
5483                 }
5484                 while (ib->length_dw & 0x7)
5485                         ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5486         }
5487 }
5488
5489 /**
5490  * cik_dma_vm_flush - cik vm flush using sDMA
5491  *
5492  * @rdev: radeon_device pointer
5493  *
5494  * Update the page table base and flush the VM TLB
5495  * using sDMA (CIK).
5496  */
5497 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5498 {
5499         struct radeon_ring *ring = &rdev->ring[ridx];
5500         u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5501                           SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5502         u32 ref_and_mask;
5503
5504         if (vm == NULL)
5505                 return;
5506
5507         if (ridx == R600_RING_TYPE_DMA_INDEX)
5508                 ref_and_mask = SDMA0;
5509         else
5510                 ref_and_mask = SDMA1;
5511
5512         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5513         if (vm->id < 8) {
5514                 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5515         } else {
5516                 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5517         }
5518         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5519
5520         /* update SH_MEM_* regs */
5521         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5522         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5523         radeon_ring_write(ring, VMID(vm->id));
5524
5525         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5526         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5527         radeon_ring_write(ring, 0);
5528
5529         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5530         radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5531         radeon_ring_write(ring, 0);
5532
5533         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5534         radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5535         radeon_ring_write(ring, 1);
5536
5537         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5538         radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5539         radeon_ring_write(ring, 0);
5540
5541         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5542         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5543         radeon_ring_write(ring, VMID(0));
5544
5545         /* flush HDP */
5546         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5547         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5548         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5549         radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5550         radeon_ring_write(ring, ref_and_mask); /* MASK */
5551         radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5552
5553         /* flush TLB */
5554         radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5555         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5556         radeon_ring_write(ring, 1 << vm->id);
5557 }
5558
5559 /*
5560  * RLC
5561  * The RLC is a multi-purpose microengine that handles a
5562  * variety of functions, the most important of which is
5563  * the interrupt controller.
5564  */
5565 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5566                                           bool enable)
5567 {
5568         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5569
5570         if (enable)
5571                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5572         else
5573                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5574         WREG32(CP_INT_CNTL_RING0, tmp);
5575 }
5576
5577 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5578 {
5579         u32 tmp;
5580
5581         tmp = RREG32(RLC_LB_CNTL);
5582         if (enable)
5583                 tmp |= LOAD_BALANCE_ENABLE;
5584         else
5585                 tmp &= ~LOAD_BALANCE_ENABLE;
5586         WREG32(RLC_LB_CNTL, tmp);
5587 }
5588
5589 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5590 {
5591         u32 i, j, k;
5592         u32 mask;
5593
5594         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5595                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5596                         cik_select_se_sh(rdev, i, j);
5597                         for (k = 0; k < rdev->usec_timeout; k++) {
5598                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5599                                         break;
5600                                 udelay(1);
5601                         }
5602                 }
5603         }
5604         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5605
5606         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5607         for (k = 0; k < rdev->usec_timeout; k++) {
5608                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5609                         break;
5610                 udelay(1);
5611         }
5612 }
5613
5614 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5615 {
5616         u32 tmp;
5617
5618         tmp = RREG32(RLC_CNTL);
5619         if (tmp != rlc)
5620                 WREG32(RLC_CNTL, rlc);
5621 }
5622
5623 static u32 cik_halt_rlc(struct radeon_device *rdev)
5624 {
5625         u32 data, orig;
5626
5627         orig = data = RREG32(RLC_CNTL);
5628
5629         if (data & RLC_ENABLE) {
5630                 u32 i;
5631
5632                 data &= ~RLC_ENABLE;
5633                 WREG32(RLC_CNTL, data);
5634
5635                 for (i = 0; i < rdev->usec_timeout; i++) {
5636                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5637                                 break;
5638                         udelay(1);
5639                 }
5640
5641                 cik_wait_for_rlc_serdes(rdev);
5642         }
5643
5644         return orig;
5645 }
5646
5647 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5648 {
5649         u32 tmp, i, mask;
5650
5651         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5652         WREG32(RLC_GPR_REG2, tmp);
5653
5654         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5655         for (i = 0; i < rdev->usec_timeout; i++) {
5656                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5657                         break;
5658                 udelay(1);
5659         }
5660
5661         for (i = 0; i < rdev->usec_timeout; i++) {
5662                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5663                         break;
5664                 udelay(1);
5665         }
5666 }
5667
5668 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5669 {
5670         u32 tmp;
5671
5672         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5673         WREG32(RLC_GPR_REG2, tmp);
5674 }
5675
5676 /**
5677  * cik_rlc_stop - stop the RLC ME
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * Halt the RLC ME (MicroEngine) (CIK).
5682  */
5683 static void cik_rlc_stop(struct radeon_device *rdev)
5684 {
5685         WREG32(RLC_CNTL, 0);
5686
5687         cik_enable_gui_idle_interrupt(rdev, false);
5688
5689         cik_wait_for_rlc_serdes(rdev);
5690 }
5691
5692 /**
5693  * cik_rlc_start - start the RLC ME
5694  *
5695  * @rdev: radeon_device pointer
5696  *
5697  * Unhalt the RLC ME (MicroEngine) (CIK).
5698  */
5699 static void cik_rlc_start(struct radeon_device *rdev)
5700 {
5701         WREG32(RLC_CNTL, RLC_ENABLE);
5702
5703         cik_enable_gui_idle_interrupt(rdev, true);
5704
5705         udelay(50);
5706 }
5707
5708 /**
5709  * cik_rlc_resume - setup the RLC hw
5710  *
5711  * @rdev: radeon_device pointer
5712  *
5713  * Initialize the RLC registers, load the ucode,
5714  * and start the RLC (CIK).
5715  * Returns 0 for success, -EINVAL if the ucode is not available.
5716  */
5717 static int cik_rlc_resume(struct radeon_device *rdev)
5718 {
5719         u32 i, size, tmp;
5720         const __be32 *fw_data;
5721
5722         if (!rdev->rlc_fw)
5723                 return -EINVAL;
5724
5725         switch (rdev->family) {
5726         case CHIP_BONAIRE:
5727         default:
5728                 size = BONAIRE_RLC_UCODE_SIZE;
5729                 break;
5730         case CHIP_KAVERI:
5731                 size = KV_RLC_UCODE_SIZE;
5732                 break;
5733         case CHIP_KABINI:
5734                 size = KB_RLC_UCODE_SIZE;
5735                 break;
5736         }
5737
5738         cik_rlc_stop(rdev);
5739
5740         /* disable CG */
5741         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5742         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5743
5744         si_rlc_reset(rdev);
5745
5746         cik_init_pg(rdev);
5747
5748         cik_init_cg(rdev);
5749
5750         WREG32(RLC_LB_CNTR_INIT, 0);
5751         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5752
5753         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5754         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5755         WREG32(RLC_LB_PARAMS, 0x00600408);
5756         WREG32(RLC_LB_CNTL, 0x80000004);
5757
5758         WREG32(RLC_MC_CNTL, 0);
5759         WREG32(RLC_UCODE_CNTL, 0);
5760
5761         fw_data = (const __be32 *)rdev->rlc_fw->data;
5762                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5763         for (i = 0; i < size; i++)
5764                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5765         WREG32(RLC_GPM_UCODE_ADDR, 0);
5766
5767         /* XXX - find out what chips support lbpw */
5768         cik_enable_lbpw(rdev, false);
5769
5770         if (rdev->family == CHIP_BONAIRE)
5771                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5772
5773         cik_rlc_start(rdev);
5774
5775         return 0;
5776 }
5777
5778 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5779 {
5780         u32 data, orig, tmp, tmp2;
5781
5782         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5783
5784         cik_enable_gui_idle_interrupt(rdev, enable);
5785
5786         if (enable) {
5787                 tmp = cik_halt_rlc(rdev);
5788
5789                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5790                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5791                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5792                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5793                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5794
5795                 cik_update_rlc(rdev, tmp);
5796
5797                 data |= CGCG_EN | CGLS_EN;
5798         } else {
5799                 RREG32(CB_CGTT_SCLK_CTRL);
5800                 RREG32(CB_CGTT_SCLK_CTRL);
5801                 RREG32(CB_CGTT_SCLK_CTRL);
5802                 RREG32(CB_CGTT_SCLK_CTRL);
5803
5804                 data &= ~(CGCG_EN | CGLS_EN);
5805         }
5806
5807         if (orig != data)
5808                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5809
5810 }
5811
5812 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5813 {
5814         u32 data, orig, tmp = 0;
5815
5816         if (enable) {
5817                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5818                 data |= CP_MEM_LS_EN;
5819                 if (orig != data)
5820                         WREG32(CP_MEM_SLP_CNTL, data);
5821
5822                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5823                 data &= 0xfffffffd;
5824                 if (orig != data)
5825                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5826
5827                 tmp = cik_halt_rlc(rdev);
5828
5829                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5830                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5831                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5832                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5833                 WREG32(RLC_SERDES_WR_CTRL, data);
5834
5835                 cik_update_rlc(rdev, tmp);
5836
5837                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5838                 data &= ~SM_MODE_MASK;
5839                 data |= SM_MODE(0x2);
5840                 data |= SM_MODE_ENABLE;
5841                 data &= ~CGTS_OVERRIDE;
5842                 data &= ~CGTS_LS_OVERRIDE;
5843                 data &= ~ON_MONITOR_ADD_MASK;
5844                 data |= ON_MONITOR_ADD_EN;
5845                 data |= ON_MONITOR_ADD(0x96);
5846                 if (orig != data)
5847                         WREG32(CGTS_SM_CTRL_REG, data);
5848         } else {
5849                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5850                 data |= 0x00000002;
5851                 if (orig != data)
5852                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5853
5854                 data = RREG32(RLC_MEM_SLP_CNTL);
5855                 if (data & RLC_MEM_LS_EN) {
5856                         data &= ~RLC_MEM_LS_EN;
5857                         WREG32(RLC_MEM_SLP_CNTL, data);
5858                 }
5859
5860                 data = RREG32(CP_MEM_SLP_CNTL);
5861                 if (data & CP_MEM_LS_EN) {
5862                         data &= ~CP_MEM_LS_EN;
5863                         WREG32(CP_MEM_SLP_CNTL, data);
5864                 }
5865
5866                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5867                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5868                 if (orig != data)
5869                         WREG32(CGTS_SM_CTRL_REG, data);
5870
5871                 tmp = cik_halt_rlc(rdev);
5872
5873                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5874                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5875                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5876                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5877                 WREG32(RLC_SERDES_WR_CTRL, data);
5878
5879                 cik_update_rlc(rdev, tmp);
5880         }
5881 }
5882
5883 static const u32 mc_cg_registers[] =
5884 {
5885         MC_HUB_MISC_HUB_CG,
5886         MC_HUB_MISC_SIP_CG,
5887         MC_HUB_MISC_VM_CG,
5888         MC_XPB_CLK_GAT,
5889         ATC_MISC_CG,
5890         MC_CITF_MISC_WR_CG,
5891         MC_CITF_MISC_RD_CG,
5892         MC_CITF_MISC_VM_CG,
5893         VM_L2_CG,
5894 };
5895
5896 static void cik_enable_mc_ls(struct radeon_device *rdev,
5897                              bool enable)
5898 {
5899         int i;
5900         u32 orig, data;
5901
5902         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5903                 orig = data = RREG32(mc_cg_registers[i]);
5904                 if (enable)
5905                         data |= MC_LS_ENABLE;
5906                 else
5907                         data &= ~MC_LS_ENABLE;
5908                 if (data != orig)
5909                         WREG32(mc_cg_registers[i], data);
5910         }
5911 }
5912
5913 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5914                                bool enable)
5915 {
5916         int i;
5917         u32 orig, data;
5918
5919         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5920                 orig = data = RREG32(mc_cg_registers[i]);
5921                 if (enable)
5922                         data |= MC_CG_ENABLE;
5923                 else
5924                         data &= ~MC_CG_ENABLE;
5925                 if (data != orig)
5926                         WREG32(mc_cg_registers[i], data);
5927         }
5928 }
5929
5930 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5931                                  bool enable)
5932 {
5933         u32 orig, data;
5934
5935         if (enable) {
5936                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5937                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5938         } else {
5939                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5940                 data |= 0xff000000;
5941                 if (data != orig)
5942                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5943
5944                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5945                 data |= 0xff000000;
5946                 if (data != orig)
5947                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5948         }
5949 }
5950
5951 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5952                                  bool enable)
5953 {
5954         u32 orig, data;
5955
5956         if (enable) {
5957                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5958                 data |= 0x100;
5959                 if (orig != data)
5960                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5961
5962                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5963                 data |= 0x100;
5964                 if (orig != data)
5965                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5966         } else {
5967                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5968                 data &= ~0x100;
5969                 if (orig != data)
5970                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5971
5972                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5973                 data &= ~0x100;
5974                 if (orig != data)
5975                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5976         }
5977 }
5978
5979 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5980                                 bool enable)
5981 {
5982         u32 orig, data;
5983
5984         if (enable) {
5985                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5986                 data = 0xfff;
5987                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5988
5989                 orig = data = RREG32(UVD_CGC_CTRL);
5990                 data |= DCM;
5991                 if (orig != data)
5992                         WREG32(UVD_CGC_CTRL, data);
5993         } else {
5994                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5995                 data &= ~0xfff;
5996                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5997
5998                 orig = data = RREG32(UVD_CGC_CTRL);
5999                 data &= ~DCM;
6000                 if (orig != data)
6001                         WREG32(UVD_CGC_CTRL, data);
6002         }
6003 }
6004
6005 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6006                                 bool enable)
6007 {
6008         u32 orig, data;
6009
6010         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6011
6012         if (enable)
6013                 data &= ~CLOCK_GATING_DIS;
6014         else
6015                 data |= CLOCK_GATING_DIS;
6016
6017         if (orig != data)
6018                 WREG32(HDP_HOST_PATH_CNTL, data);
6019 }
6020
6021 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6022                               bool enable)
6023 {
6024         u32 orig, data;
6025
6026         orig = data = RREG32(HDP_MEM_POWER_LS);
6027
6028         if (enable)
6029                 data |= HDP_LS_ENABLE;
6030         else
6031                 data &= ~HDP_LS_ENABLE;
6032
6033         if (orig != data)
6034                 WREG32(HDP_MEM_POWER_LS, data);
6035 }
6036
6037 void cik_update_cg(struct radeon_device *rdev,
6038                    u32 block, bool enable)
6039 {
6040         if (block & RADEON_CG_BLOCK_GFX) {
6041                 /* order matters! */
6042                 if (enable) {
6043                         cik_enable_mgcg(rdev, true);
6044                         cik_enable_cgcg(rdev, true);
6045                 } else {
6046                         cik_enable_cgcg(rdev, false);
6047                         cik_enable_mgcg(rdev, false);
6048                 }
6049         }
6050
6051         if (block & RADEON_CG_BLOCK_MC) {
6052                 if (!(rdev->flags & RADEON_IS_IGP)) {
6053                         cik_enable_mc_mgcg(rdev, enable);
6054                         cik_enable_mc_ls(rdev, enable);
6055                 }
6056         }
6057
6058         if (block & RADEON_CG_BLOCK_SDMA) {
6059                 cik_enable_sdma_mgcg(rdev, enable);
6060                 cik_enable_sdma_mgls(rdev, enable);
6061         }
6062
6063         if (block & RADEON_CG_BLOCK_UVD) {
6064                 if (rdev->has_uvd)
6065                         cik_enable_uvd_mgcg(rdev, enable);
6066         }
6067
6068         if (block & RADEON_CG_BLOCK_HDP) {
6069                 cik_enable_hdp_mgcg(rdev, enable);
6070                 cik_enable_hdp_ls(rdev, enable);
6071         }
6072 }
6073
6074 static void cik_init_cg(struct radeon_device *rdev)
6075 {
6076
6077         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6078
6079         if (rdev->has_uvd)
6080                 si_init_uvd_internal_cg(rdev);
6081
6082         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6083                              RADEON_CG_BLOCK_SDMA |
6084                              RADEON_CG_BLOCK_UVD |
6085                              RADEON_CG_BLOCK_HDP), true);
6086 }
6087
6088 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6089                                           bool enable)
6090 {
6091         u32 data, orig;
6092
6093         orig = data = RREG32(RLC_PG_CNTL);
6094         if (enable)
6095                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6096         else
6097                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6098         if (orig != data)
6099                 WREG32(RLC_PG_CNTL, data);
6100 }
6101
6102 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6103                                           bool enable)
6104 {
6105         u32 data, orig;
6106
6107         orig = data = RREG32(RLC_PG_CNTL);
6108         if (enable)
6109                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6110         else
6111                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6112         if (orig != data)
6113                 WREG32(RLC_PG_CNTL, data);
6114 }
6115
6116 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6117 {
6118         u32 data, orig;
6119
6120         orig = data = RREG32(RLC_PG_CNTL);
6121         if (enable)
6122                 data &= ~DISABLE_CP_PG;
6123         else
6124                 data |= DISABLE_CP_PG;
6125         if (orig != data)
6126                 WREG32(RLC_PG_CNTL, data);
6127 }
6128
6129 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6130 {
6131         u32 data, orig;
6132
6133         orig = data = RREG32(RLC_PG_CNTL);
6134         if (enable)
6135                 data &= ~DISABLE_GDS_PG;
6136         else
6137                 data |= DISABLE_GDS_PG;
6138         if (orig != data)
6139                 WREG32(RLC_PG_CNTL, data);
6140 }
6141
6142 #define CP_ME_TABLE_SIZE    96
6143 #define CP_ME_TABLE_OFFSET  2048
6144 #define CP_MEC_TABLE_OFFSET 4096
6145
6146 void cik_init_cp_pg_table(struct radeon_device *rdev)
6147 {
6148         const __be32 *fw_data;
6149         volatile u32 *dst_ptr;
6150         int me, i, max_me = 4;
6151         u32 bo_offset = 0;
6152         u32 table_offset;
6153
6154         if (rdev->family == CHIP_KAVERI)
6155                 max_me = 5;
6156
6157         if (rdev->rlc.cp_table_ptr == NULL)
6158                 return;
6159
6160         /* write the cp table buffer */
6161         dst_ptr = rdev->rlc.cp_table_ptr;
6162         for (me = 0; me < max_me; me++) {
6163                 if (me == 0) {
6164                         fw_data = (const __be32 *)rdev->ce_fw->data;
6165                         table_offset = CP_ME_TABLE_OFFSET;
6166                 } else if (me == 1) {
6167                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6168                         table_offset = CP_ME_TABLE_OFFSET;
6169                 } else if (me == 2) {
6170                         fw_data = (const __be32 *)rdev->me_fw->data;
6171                         table_offset = CP_ME_TABLE_OFFSET;
6172                 } else {
6173                         fw_data = (const __be32 *)rdev->mec_fw->data;
6174                         table_offset = CP_MEC_TABLE_OFFSET;
6175                 }
6176
6177                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6178                         dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6179                 }
6180                 bo_offset += CP_ME_TABLE_SIZE;
6181         }
6182 }
6183
6184 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6185                                 bool enable)
6186 {
6187         u32 data, orig;
6188
6189         if (enable) {
6190                 orig = data = RREG32(RLC_PG_CNTL);
6191                 data |= GFX_PG_ENABLE;
6192                 if (orig != data)
6193                         WREG32(RLC_PG_CNTL, data);
6194
6195                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6196                 data |= AUTO_PG_EN;
6197                 if (orig != data)
6198                         WREG32(RLC_AUTO_PG_CTRL, data);
6199         } else {
6200                 orig = data = RREG32(RLC_PG_CNTL);
6201                 data &= ~GFX_PG_ENABLE;
6202                 if (orig != data)
6203                         WREG32(RLC_PG_CNTL, data);
6204
6205                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6206                 data &= ~AUTO_PG_EN;
6207                 if (orig != data)
6208                         WREG32(RLC_AUTO_PG_CTRL, data);
6209
6210                 data = RREG32(DB_RENDER_CONTROL);
6211         }
6212 }
6213
6214 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6215 {
6216         u32 mask = 0, tmp, tmp1;
6217         int i;
6218
6219         cik_select_se_sh(rdev, se, sh);
6220         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6221         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6222         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6223
6224         tmp &= 0xffff0000;
6225
6226         tmp |= tmp1;
6227         tmp >>= 16;
6228
6229         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6230                 mask <<= 1;
6231                 mask |= 1;
6232         }
6233
6234         return (~tmp) & mask;
6235 }
6236
6237 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6238 {
6239         u32 i, j, k, active_cu_number = 0;
6240         u32 mask, counter, cu_bitmap;
6241         u32 tmp = 0;
6242
6243         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6244                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6245                         mask = 1;
6246                         cu_bitmap = 0;
6247                         counter = 0;
6248                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6249                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6250                                         if (counter < 2)
6251                                                 cu_bitmap |= mask;
6252                                         counter ++;
6253                                 }
6254                                 mask <<= 1;
6255                         }
6256
6257                         active_cu_number += counter;
6258                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6259                 }
6260         }
6261
6262         WREG32(RLC_PG_AO_CU_MASK, tmp);
6263
6264         tmp = RREG32(RLC_MAX_PG_CU);
6265         tmp &= ~MAX_PU_CU_MASK;
6266         tmp |= MAX_PU_CU(active_cu_number);
6267         WREG32(RLC_MAX_PG_CU, tmp);
6268 }
6269
6270 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6271                                        bool enable)
6272 {
6273         u32 data, orig;
6274
6275         orig = data = RREG32(RLC_PG_CNTL);
6276         if (enable)
6277                 data |= STATIC_PER_CU_PG_ENABLE;
6278         else
6279                 data &= ~STATIC_PER_CU_PG_ENABLE;
6280         if (orig != data)
6281                 WREG32(RLC_PG_CNTL, data);
6282 }
6283
6284 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6285                                         bool enable)
6286 {
6287         u32 data, orig;
6288
6289         orig = data = RREG32(RLC_PG_CNTL);
6290         if (enable)
6291                 data |= DYN_PER_CU_PG_ENABLE;
6292         else
6293                 data &= ~DYN_PER_CU_PG_ENABLE;
6294         if (orig != data)
6295                 WREG32(RLC_PG_CNTL, data);
6296 }
6297
6298 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6299 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6300
6301 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6302 {
6303         u32 data, orig;
6304         u32 i;
6305
6306         if (rdev->rlc.cs_data) {
6307                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6308                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6309                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6310                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6311         } else {
6312                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6313                 for (i = 0; i < 3; i++)
6314                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6315         }
6316         if (rdev->rlc.reg_list) {
6317                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6318                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6319                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6320         }
6321
6322         orig = data = RREG32(RLC_PG_CNTL);
6323         data |= GFX_PG_SRC;
6324         if (orig != data)
6325                 WREG32(RLC_PG_CNTL, data);
6326
6327         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6328         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6329
6330         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6331         data &= ~IDLE_POLL_COUNT_MASK;
6332         data |= IDLE_POLL_COUNT(0x60);
6333         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6334
6335         data = 0x10101010;
6336         WREG32(RLC_PG_DELAY, data);
6337
6338         data = RREG32(RLC_PG_DELAY_2);
6339         data &= ~0xff;
6340         data |= 0x3;
6341         WREG32(RLC_PG_DELAY_2, data);
6342
6343         data = RREG32(RLC_AUTO_PG_CTRL);
6344         data &= ~GRBM_REG_SGIT_MASK;
6345         data |= GRBM_REG_SGIT(0x700);
6346         WREG32(RLC_AUTO_PG_CTRL, data);
6347
6348 }
6349
6350 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6351 {
6352         bool has_pg = false;
6353         bool has_dyn_mgpg = false;
6354         bool has_static_mgpg = false;
6355
6356         /* only APUs have PG */
6357         if (rdev->flags & RADEON_IS_IGP) {
6358                 has_pg = true;
6359                 has_static_mgpg = true;
6360                 if (rdev->family == CHIP_KAVERI)
6361                         has_dyn_mgpg = true;
6362         }
6363
6364         if (has_pg) {
6365                 cik_enable_gfx_cgpg(rdev, enable);
6366                 if (enable) {
6367                         cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6368                         cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6369                 } else {
6370                         cik_enable_gfx_static_mgpg(rdev, false);
6371                         cik_enable_gfx_dynamic_mgpg(rdev, false);
6372                 }
6373         }
6374
6375 }
6376
6377 void cik_init_pg(struct radeon_device *rdev)
6378 {
6379         bool has_pg = false;
6380
6381         /* only APUs have PG */
6382         if (rdev->flags & RADEON_IS_IGP) {
6383                 /* XXX disable this for now */
6384                 /* has_pg = true; */
6385         }
6386
6387         if (has_pg) {
6388                 cik_enable_sck_slowdown_on_pu(rdev, true);
6389                 cik_enable_sck_slowdown_on_pd(rdev, true);
6390                 cik_init_gfx_cgpg(rdev);
6391                 cik_enable_cp_pg(rdev, true);
6392                 cik_enable_gds_pg(rdev, true);
6393                 cik_init_ao_cu_mask(rdev);
6394                 cik_update_gfx_pg(rdev, true);
6395         }
6396 }
6397
6398 /*
6399  * Interrupts
6400  * Starting with r6xx, interrupts are handled via a ring buffer.
6401  * Ring buffers are areas of GPU accessible memory that the GPU
6402  * writes interrupt vectors into and the host reads vectors out of.
6403  * There is a rptr (read pointer) that determines where the
6404  * host is currently reading, and a wptr (write pointer)
6405  * which determines where the GPU has written.  When the
6406  * pointers are equal, the ring is idle.  When the GPU
6407  * writes vectors to the ring buffer, it increments the
6408  * wptr.  When there is an interrupt, the host then starts
6409  * fetching commands and processing them until the pointers are
6410  * equal again at which point it updates the rptr.
6411  */
6412
6413 /**
6414  * cik_enable_interrupts - Enable the interrupt ring buffer
6415  *
6416  * @rdev: radeon_device pointer
6417  *
6418  * Enable the interrupt ring buffer (CIK).
6419  */
6420 static void cik_enable_interrupts(struct radeon_device *rdev)
6421 {
6422         u32 ih_cntl = RREG32(IH_CNTL);
6423         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6424
6425         ih_cntl |= ENABLE_INTR;
6426         ih_rb_cntl |= IH_RB_ENABLE;
6427         WREG32(IH_CNTL, ih_cntl);
6428         WREG32(IH_RB_CNTL, ih_rb_cntl);
6429         rdev->ih.enabled = true;
6430 }
6431
6432 /**
6433  * cik_disable_interrupts - Disable the interrupt ring buffer
6434  *
6435  * @rdev: radeon_device pointer
6436  *
6437  * Disable the interrupt ring buffer (CIK).
6438  */
6439 static void cik_disable_interrupts(struct radeon_device *rdev)
6440 {
6441         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6442         u32 ih_cntl = RREG32(IH_CNTL);
6443
6444         ih_rb_cntl &= ~IH_RB_ENABLE;
6445         ih_cntl &= ~ENABLE_INTR;
6446         WREG32(IH_RB_CNTL, ih_rb_cntl);
6447         WREG32(IH_CNTL, ih_cntl);
6448         /* set rptr, wptr to 0 */
6449         WREG32(IH_RB_RPTR, 0);
6450         WREG32(IH_RB_WPTR, 0);
6451         rdev->ih.enabled = false;
6452         rdev->ih.rptr = 0;
6453 }
6454
6455 /**
6456  * cik_disable_interrupt_state - Disable all interrupt sources
6457  *
6458  * @rdev: radeon_device pointer
6459  *
6460  * Clear all interrupt enable bits used by the driver (CIK).
6461  */
6462 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6463 {
6464         u32 tmp;
6465
6466         /* gfx ring */
6467         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6468         /* sdma */
6469         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6470         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6471         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6472         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6473         /* compute queues */
6474         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6475         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6476         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6477         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6478         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6479         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6480         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6481         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6482         /* grbm */
6483         WREG32(GRBM_INT_CNTL, 0);
6484         /* vline/vblank, etc. */
6485         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6486         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6487         if (rdev->num_crtc >= 4) {
6488                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6489                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6490         }
6491         if (rdev->num_crtc >= 6) {
6492                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6493                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6494         }
6495
6496         /* dac hotplug */
6497         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6498
6499         /* digital hotplug */
6500         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6501         WREG32(DC_HPD1_INT_CONTROL, tmp);
6502         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6503         WREG32(DC_HPD2_INT_CONTROL, tmp);
6504         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6505         WREG32(DC_HPD3_INT_CONTROL, tmp);
6506         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6507         WREG32(DC_HPD4_INT_CONTROL, tmp);
6508         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6509         WREG32(DC_HPD5_INT_CONTROL, tmp);
6510         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6511         WREG32(DC_HPD6_INT_CONTROL, tmp);
6512
6513 }
6514
6515 /**
6516  * cik_irq_init - init and enable the interrupt ring
6517  *
6518  * @rdev: radeon_device pointer
6519  *
6520  * Allocate a ring buffer for the interrupt controller,
6521  * enable the RLC, disable interrupts, enable the IH
6522  * ring buffer and enable it (CIK).
6523  * Called at device load and reume.
6524  * Returns 0 for success, errors for failure.
6525  */
6526 static int cik_irq_init(struct radeon_device *rdev)
6527 {
6528         int ret = 0;
6529         int rb_bufsz;
6530         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6531
6532         /* allocate ring */
6533         ret = r600_ih_ring_alloc(rdev);
6534         if (ret)
6535                 return ret;
6536
6537         /* disable irqs */
6538         cik_disable_interrupts(rdev);
6539
6540         /* init rlc */
6541         ret = cik_rlc_resume(rdev);
6542         if (ret) {
6543                 r600_ih_ring_fini(rdev);
6544                 return ret;
6545         }
6546
6547         /* setup interrupt control */
6548         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6549         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6550         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6551         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6552          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6553          */
6554         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6555         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6556         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6557         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6558
6559         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6560         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6561
6562         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6563                       IH_WPTR_OVERFLOW_CLEAR |
6564                       (rb_bufsz << 1));
6565
6566         if (rdev->wb.enabled)
6567                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6568
6569         /* set the writeback address whether it's enabled or not */
6570         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6571         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6572
6573         WREG32(IH_RB_CNTL, ih_rb_cntl);
6574
6575         /* set rptr, wptr to 0 */
6576         WREG32(IH_RB_RPTR, 0);
6577         WREG32(IH_RB_WPTR, 0);
6578
6579         /* Default settings for IH_CNTL (disabled at first) */
6580         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6581         /* RPTR_REARM only works if msi's are enabled */
6582         if (rdev->msi_enabled)
6583                 ih_cntl |= RPTR_REARM;
6584         WREG32(IH_CNTL, ih_cntl);
6585
6586         /* force the active interrupt state to all disabled */
6587         cik_disable_interrupt_state(rdev);
6588
6589         pci_set_master(rdev->pdev);
6590
6591         /* enable irqs */
6592         cik_enable_interrupts(rdev);
6593
6594         return ret;
6595 }
6596
6597 /**
6598  * cik_irq_set - enable/disable interrupt sources
6599  *
6600  * @rdev: radeon_device pointer
6601  *
6602  * Enable interrupt sources on the GPU (vblanks, hpd,
6603  * etc.) (CIK).
6604  * Returns 0 for success, errors for failure.
6605  */
6606 int cik_irq_set(struct radeon_device *rdev)
6607 {
6608         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6609                 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6610         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6611         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6612         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6613         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6614         u32 grbm_int_cntl = 0;
6615         u32 dma_cntl, dma_cntl1;
6616         u32 thermal_int;
6617
6618         if (!rdev->irq.installed) {
6619                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6620                 return -EINVAL;
6621         }
6622         /* don't enable anything if the ih is disabled */
6623         if (!rdev->ih.enabled) {
6624                 cik_disable_interrupts(rdev);
6625                 /* force the active interrupt state to all disabled */
6626                 cik_disable_interrupt_state(rdev);
6627                 return 0;
6628         }
6629
6630         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6631         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6632         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6633         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6634         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6635         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6636
6637         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6638         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6639
6640         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6641         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6642         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6643         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6644         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6645         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6646         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6647         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6648
6649         if (rdev->flags & RADEON_IS_IGP)
6650                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6651                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6652         else
6653                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6654                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6655
6656         /* enable CP interrupts on all rings */
6657         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6658                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6659                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6660         }
6661         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6662                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6663                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6664                 if (ring->me == 1) {
6665                         switch (ring->pipe) {
6666                         case 0:
6667                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6668                                 break;
6669                         case 1:
6670                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6671                                 break;
6672                         case 2:
6673                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6674                                 break;
6675                         case 3:
6676                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6677                                 break;
6678                         default:
6679                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6680                                 break;
6681                         }
6682                 } else if (ring->me == 2) {
6683                         switch (ring->pipe) {
6684                         case 0:
6685                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6686                                 break;
6687                         case 1:
6688                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6689                                 break;
6690                         case 2:
6691                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6692                                 break;
6693                         case 3:
6694                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6695                                 break;
6696                         default:
6697                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6698                                 break;
6699                         }
6700                 } else {
6701                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6702                 }
6703         }
6704         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6705                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6706                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6707                 if (ring->me == 1) {
6708                         switch (ring->pipe) {
6709                         case 0:
6710                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6711                                 break;
6712                         case 1:
6713                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6714                                 break;
6715                         case 2:
6716                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6717                                 break;
6718                         case 3:
6719                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6720                                 break;
6721                         default:
6722                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6723                                 break;
6724                         }
6725                 } else if (ring->me == 2) {
6726                         switch (ring->pipe) {
6727                         case 0:
6728                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6729                                 break;
6730                         case 1:
6731                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6732                                 break;
6733                         case 2:
6734                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6735                                 break;
6736                         case 3:
6737                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6738                                 break;
6739                         default:
6740                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6741                                 break;
6742                         }
6743                 } else {
6744                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6745                 }
6746         }
6747
6748         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6749                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6750                 dma_cntl |= TRAP_ENABLE;
6751         }
6752
6753         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6754                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6755                 dma_cntl1 |= TRAP_ENABLE;
6756         }
6757
6758         if (rdev->irq.crtc_vblank_int[0] ||
6759             atomic_read(&rdev->irq.pflip[0])) {
6760                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6761                 crtc1 |= VBLANK_INTERRUPT_MASK;
6762         }
6763         if (rdev->irq.crtc_vblank_int[1] ||
6764             atomic_read(&rdev->irq.pflip[1])) {
6765                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6766                 crtc2 |= VBLANK_INTERRUPT_MASK;
6767         }
6768         if (rdev->irq.crtc_vblank_int[2] ||
6769             atomic_read(&rdev->irq.pflip[2])) {
6770                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6771                 crtc3 |= VBLANK_INTERRUPT_MASK;
6772         }
6773         if (rdev->irq.crtc_vblank_int[3] ||
6774             atomic_read(&rdev->irq.pflip[3])) {
6775                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6776                 crtc4 |= VBLANK_INTERRUPT_MASK;
6777         }
6778         if (rdev->irq.crtc_vblank_int[4] ||
6779             atomic_read(&rdev->irq.pflip[4])) {
6780                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6781                 crtc5 |= VBLANK_INTERRUPT_MASK;
6782         }
6783         if (rdev->irq.crtc_vblank_int[5] ||
6784             atomic_read(&rdev->irq.pflip[5])) {
6785                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6786                 crtc6 |= VBLANK_INTERRUPT_MASK;
6787         }
6788         if (rdev->irq.hpd[0]) {
6789                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6790                 hpd1 |= DC_HPDx_INT_EN;
6791         }
6792         if (rdev->irq.hpd[1]) {
6793                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6794                 hpd2 |= DC_HPDx_INT_EN;
6795         }
6796         if (rdev->irq.hpd[2]) {
6797                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6798                 hpd3 |= DC_HPDx_INT_EN;
6799         }
6800         if (rdev->irq.hpd[3]) {
6801                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6802                 hpd4 |= DC_HPDx_INT_EN;
6803         }
6804         if (rdev->irq.hpd[4]) {
6805                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6806                 hpd5 |= DC_HPDx_INT_EN;
6807         }
6808         if (rdev->irq.hpd[5]) {
6809                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6810                 hpd6 |= DC_HPDx_INT_EN;
6811         }
6812
6813         if (rdev->irq.dpm_thermal) {
6814                 DRM_DEBUG("dpm thermal\n");
6815                 if (rdev->flags & RADEON_IS_IGP)
6816                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6817                 else
6818                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6819         }
6820
6821         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6822
6823         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6824         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6825
6826         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6827         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6828         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6829         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6830         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6831         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6832         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6833         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6834
6835         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6836
6837         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6838         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6839         if (rdev->num_crtc >= 4) {
6840                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6841                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6842         }
6843         if (rdev->num_crtc >= 6) {
6844                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6845                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6846         }
6847
6848         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6849         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6850         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6851         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6852         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6853         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6854
6855         if (rdev->flags & RADEON_IS_IGP)
6856                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6857         else
6858                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6859
6860         return 0;
6861 }
6862
6863 /**
6864  * cik_irq_ack - ack interrupt sources
6865  *
6866  * @rdev: radeon_device pointer
6867  *
6868  * Ack interrupt sources on the GPU (vblanks, hpd,
6869  * etc.) (CIK).  Certain interrupts sources are sw
6870  * generated and do not require an explicit ack.
6871  */
6872 static inline void cik_irq_ack(struct radeon_device *rdev)
6873 {
6874         u32 tmp;
6875
6876         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6877         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6878         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6879         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6880         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6881         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6882         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6883
6884         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6885                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6886         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6887                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6888         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6889                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6890         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6891                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6892
6893         if (rdev->num_crtc >= 4) {
6894                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6895                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6896                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6897                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6898                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6899                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6900                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6901                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6902         }
6903
6904         if (rdev->num_crtc >= 6) {
6905                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6906                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6907                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6908                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6909                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6910                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6911                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6912                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6913         }
6914
6915         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6916                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6917                 tmp |= DC_HPDx_INT_ACK;
6918                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6919         }
6920         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6921                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6922                 tmp |= DC_HPDx_INT_ACK;
6923                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6924         }
6925         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6926                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6927                 tmp |= DC_HPDx_INT_ACK;
6928                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6929         }
6930         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6931                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6932                 tmp |= DC_HPDx_INT_ACK;
6933                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6934         }
6935         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6936                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6937                 tmp |= DC_HPDx_INT_ACK;
6938                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6939         }
6940         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6941                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6942                 tmp |= DC_HPDx_INT_ACK;
6943                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6944         }
6945 }
6946
6947 /**
6948  * cik_irq_disable - disable interrupts
6949  *
6950  * @rdev: radeon_device pointer
6951  *
6952  * Disable interrupts on the hw (CIK).
6953  */
6954 static void cik_irq_disable(struct radeon_device *rdev)
6955 {
6956         cik_disable_interrupts(rdev);
6957         /* Wait and acknowledge irq */
6958         mdelay(1);
6959         cik_irq_ack(rdev);
6960         cik_disable_interrupt_state(rdev);
6961 }
6962
6963 /**
6964  * cik_irq_disable - disable interrupts for suspend
6965  *
6966  * @rdev: radeon_device pointer
6967  *
6968  * Disable interrupts and stop the RLC (CIK).
6969  * Used for suspend.
6970  */
6971 static void cik_irq_suspend(struct radeon_device *rdev)
6972 {
6973         cik_irq_disable(rdev);
6974         cik_rlc_stop(rdev);
6975 }
6976
6977 /**
6978  * cik_irq_fini - tear down interrupt support
6979  *
6980  * @rdev: radeon_device pointer
6981  *
6982  * Disable interrupts on the hw and free the IH ring
6983  * buffer (CIK).
6984  * Used for driver unload.
6985  */
6986 static void cik_irq_fini(struct radeon_device *rdev)
6987 {
6988         cik_irq_suspend(rdev);
6989         r600_ih_ring_fini(rdev);
6990 }
6991
6992 /**
6993  * cik_get_ih_wptr - get the IH ring buffer wptr
6994  *
6995  * @rdev: radeon_device pointer
6996  *
6997  * Get the IH ring buffer wptr from either the register
6998  * or the writeback memory buffer (CIK).  Also check for
6999  * ring buffer overflow and deal with it.
7000  * Used by cik_irq_process().
7001  * Returns the value of the wptr.
7002  */
7003 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7004 {
7005         u32 wptr, tmp;
7006
7007         if (rdev->wb.enabled)
7008                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7009         else
7010                 wptr = RREG32(IH_RB_WPTR);
7011
7012         if (wptr & RB_OVERFLOW) {
7013                 /* When a ring buffer overflow happen start parsing interrupt
7014                  * from the last not overwritten vector (wptr + 16). Hopefully
7015                  * this should allow us to catchup.
7016                  */
7017                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7018                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7019                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7020                 tmp = RREG32(IH_RB_CNTL);
7021                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7022                 WREG32(IH_RB_CNTL, tmp);
7023         }
7024         return (wptr & rdev->ih.ptr_mask);
7025 }
7026
7027 /*        CIK IV Ring
7028  * Each IV ring entry is 128 bits:
7029  * [7:0]    - interrupt source id
7030  * [31:8]   - reserved
7031  * [59:32]  - interrupt source data
7032  * [63:60]  - reserved
7033  * [71:64]  - RINGID
7034  *            CP:
7035  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7036  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7037  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7038  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7039  *            PIPE_ID - ME0 0=3D
7040  *                    - ME1&2 compute dispatcher (4 pipes each)
7041  *            SDMA:
7042  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7043  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7044  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7045  * [79:72]  - VMID
7046  * [95:80]  - PASID
7047  * [127:96] - reserved
7048  */
7049 /**
7050  * cik_irq_process - interrupt handler
7051  *
7052  * @rdev: radeon_device pointer
7053  *
7054  * Interrupt hander (CIK).  Walk the IH ring,
7055  * ack interrupts and schedule work to handle
7056  * interrupt events.
7057  * Returns irq process return code.
7058  */
7059 int cik_irq_process(struct radeon_device *rdev)
7060 {
7061         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7062         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7063         u32 wptr;
7064         u32 rptr;
7065         u32 src_id, src_data, ring_id;
7066         u8 me_id, pipe_id, queue_id;
7067         u32 ring_index;
7068         bool queue_hotplug = false;
7069         bool queue_reset = false;
7070         u32 addr, status, mc_client;
7071         bool queue_thermal = false;
7072
7073         if (!rdev->ih.enabled || rdev->shutdown)
7074                 return IRQ_NONE;
7075
7076         wptr = cik_get_ih_wptr(rdev);
7077
7078 restart_ih:
7079         /* is somebody else already processing irqs? */
7080         if (atomic_xchg(&rdev->ih.lock, 1))
7081                 return IRQ_NONE;
7082
7083         rptr = rdev->ih.rptr;
7084         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7085
7086         /* Order reading of wptr vs. reading of IH ring data */
7087         rmb();
7088
7089         /* display interrupts */
7090         cik_irq_ack(rdev);
7091
7092         while (rptr != wptr) {
7093                 /* wptr/rptr are in bytes! */
7094                 ring_index = rptr / 4;
7095                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7096                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7097                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7098
7099                 switch (src_id) {
7100                 case 1: /* D1 vblank/vline */
7101                         switch (src_data) {
7102                         case 0: /* D1 vblank */
7103                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7104                                         if (rdev->irq.crtc_vblank_int[0]) {
7105                                                 drm_handle_vblank(rdev->ddev, 0);
7106                                                 rdev->pm.vblank_sync = true;
7107                                                 wake_up(&rdev->irq.vblank_queue);
7108                                         }
7109                                         if (atomic_read(&rdev->irq.pflip[0]))
7110                                                 radeon_crtc_handle_flip(rdev, 0);
7111                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7112                                         DRM_DEBUG("IH: D1 vblank\n");
7113                                 }
7114                                 break;
7115                         case 1: /* D1 vline */
7116                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7117                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7118                                         DRM_DEBUG("IH: D1 vline\n");
7119                                 }
7120                                 break;
7121                         default:
7122                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7123                                 break;
7124                         }
7125                         break;
7126                 case 2: /* D2 vblank/vline */
7127                         switch (src_data) {
7128                         case 0: /* D2 vblank */
7129                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7130                                         if (rdev->irq.crtc_vblank_int[1]) {
7131                                                 drm_handle_vblank(rdev->ddev, 1);
7132                                                 rdev->pm.vblank_sync = true;
7133                                                 wake_up(&rdev->irq.vblank_queue);
7134                                         }
7135                                         if (atomic_read(&rdev->irq.pflip[1]))
7136                                                 radeon_crtc_handle_flip(rdev, 1);
7137                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7138                                         DRM_DEBUG("IH: D2 vblank\n");
7139                                 }
7140                                 break;
7141                         case 1: /* D2 vline */
7142                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7143                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7144                                         DRM_DEBUG("IH: D2 vline\n");
7145                                 }
7146                                 break;
7147                         default:
7148                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7149                                 break;
7150                         }
7151                         break;
7152                 case 3: /* D3 vblank/vline */
7153                         switch (src_data) {
7154                         case 0: /* D3 vblank */
7155                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7156                                         if (rdev->irq.crtc_vblank_int[2]) {
7157                                                 drm_handle_vblank(rdev->ddev, 2);
7158                                                 rdev->pm.vblank_sync = true;
7159                                                 wake_up(&rdev->irq.vblank_queue);
7160                                         }
7161                                         if (atomic_read(&rdev->irq.pflip[2]))
7162                                                 radeon_crtc_handle_flip(rdev, 2);
7163                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7164                                         DRM_DEBUG("IH: D3 vblank\n");
7165                                 }
7166                                 break;
7167                         case 1: /* D3 vline */
7168                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7169                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7170                                         DRM_DEBUG("IH: D3 vline\n");
7171                                 }
7172                                 break;
7173                         default:
7174                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7175                                 break;
7176                         }
7177                         break;
7178                 case 4: /* D4 vblank/vline */
7179                         switch (src_data) {
7180                         case 0: /* D4 vblank */
7181                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7182                                         if (rdev->irq.crtc_vblank_int[3]) {
7183                                                 drm_handle_vblank(rdev->ddev, 3);
7184                                                 rdev->pm.vblank_sync = true;
7185                                                 wake_up(&rdev->irq.vblank_queue);
7186                                         }
7187                                         if (atomic_read(&rdev->irq.pflip[3]))
7188                                                 radeon_crtc_handle_flip(rdev, 3);
7189                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7190                                         DRM_DEBUG("IH: D4 vblank\n");
7191                                 }
7192                                 break;
7193                         case 1: /* D4 vline */
7194                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7195                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7196                                         DRM_DEBUG("IH: D4 vline\n");
7197                                 }
7198                                 break;
7199                         default:
7200                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7201                                 break;
7202                         }
7203                         break;
7204                 case 5: /* D5 vblank/vline */
7205                         switch (src_data) {
7206                         case 0: /* D5 vblank */
7207                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7208                                         if (rdev->irq.crtc_vblank_int[4]) {
7209                                                 drm_handle_vblank(rdev->ddev, 4);
7210                                                 rdev->pm.vblank_sync = true;
7211                                                 wake_up(&rdev->irq.vblank_queue);
7212                                         }
7213                                         if (atomic_read(&rdev->irq.pflip[4]))
7214                                                 radeon_crtc_handle_flip(rdev, 4);
7215                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7216                                         DRM_DEBUG("IH: D5 vblank\n");
7217                                 }
7218                                 break;
7219                         case 1: /* D5 vline */
7220                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7221                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7222                                         DRM_DEBUG("IH: D5 vline\n");
7223                                 }
7224                                 break;
7225                         default:
7226                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7227                                 break;
7228                         }
7229                         break;
7230                 case 6: /* D6 vblank/vline */
7231                         switch (src_data) {
7232                         case 0: /* D6 vblank */
7233                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7234                                         if (rdev->irq.crtc_vblank_int[5]) {
7235                                                 drm_handle_vblank(rdev->ddev, 5);
7236                                                 rdev->pm.vblank_sync = true;
7237                                                 wake_up(&rdev->irq.vblank_queue);
7238                                         }
7239                                         if (atomic_read(&rdev->irq.pflip[5]))
7240                                                 radeon_crtc_handle_flip(rdev, 5);
7241                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7242                                         DRM_DEBUG("IH: D6 vblank\n");
7243                                 }
7244                                 break;
7245                         case 1: /* D6 vline */
7246                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7247                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7248                                         DRM_DEBUG("IH: D6 vline\n");
7249                                 }
7250                                 break;
7251                         default:
7252                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7253                                 break;
7254                         }
7255                         break;
7256                 case 42: /* HPD hotplug */
7257                         switch (src_data) {
7258                         case 0:
7259                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7260                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7261                                         queue_hotplug = true;
7262                                         DRM_DEBUG("IH: HPD1\n");
7263                                 }
7264                                 break;
7265                         case 1:
7266                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7267                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7268                                         queue_hotplug = true;
7269                                         DRM_DEBUG("IH: HPD2\n");
7270                                 }
7271                                 break;
7272                         case 2:
7273                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7274                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7275                                         queue_hotplug = true;
7276                                         DRM_DEBUG("IH: HPD3\n");
7277                                 }
7278                                 break;
7279                         case 3:
7280                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7281                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7282                                         queue_hotplug = true;
7283                                         DRM_DEBUG("IH: HPD4\n");
7284                                 }
7285                                 break;
7286                         case 4:
7287                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7288                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7289                                         queue_hotplug = true;
7290                                         DRM_DEBUG("IH: HPD5\n");
7291                                 }
7292                                 break;
7293                         case 5:
7294                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7295                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7296                                         queue_hotplug = true;
7297                                         DRM_DEBUG("IH: HPD6\n");
7298                                 }
7299                                 break;
7300                         default:
7301                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7302                                 break;
7303                         }
7304                         break;
7305                 case 146:
7306                 case 147:
7307                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7308                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7309                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7310                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7311                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7312                                 addr);
7313                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7314                                 status);
7315                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7316                         /* reset addr and status */
7317                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7318                         break;
7319                 case 176: /* GFX RB CP_INT */
7320                 case 177: /* GFX IB CP_INT */
7321                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7322                         break;
7323                 case 181: /* CP EOP event */
7324                         DRM_DEBUG("IH: CP EOP\n");
7325                         /* XXX check the bitfield order! */
7326                         me_id = (ring_id & 0x60) >> 5;
7327                         pipe_id = (ring_id & 0x18) >> 3;
7328                         queue_id = (ring_id & 0x7) >> 0;
7329                         switch (me_id) {
7330                         case 0:
7331                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7332                                 break;
7333                         case 1:
7334                         case 2:
7335                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7336                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7337                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7338                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7339                                 break;
7340                         }
7341                         break;
7342                 case 184: /* CP Privileged reg access */
7343                         DRM_ERROR("Illegal register access in command stream\n");
7344                         /* XXX check the bitfield order! */
7345                         me_id = (ring_id & 0x60) >> 5;
7346                         pipe_id = (ring_id & 0x18) >> 3;
7347                         queue_id = (ring_id & 0x7) >> 0;
7348                         switch (me_id) {
7349                         case 0:
7350                                 /* This results in a full GPU reset, but all we need to do is soft
7351                                  * reset the CP for gfx
7352                                  */
7353                                 queue_reset = true;
7354                                 break;
7355                         case 1:
7356                                 /* XXX compute */
7357                                 queue_reset = true;
7358                                 break;
7359                         case 2:
7360                                 /* XXX compute */
7361                                 queue_reset = true;
7362                                 break;
7363                         }
7364                         break;
7365                 case 185: /* CP Privileged inst */
7366                         DRM_ERROR("Illegal instruction in command stream\n");
7367                         /* XXX check the bitfield order! */
7368                         me_id = (ring_id & 0x60) >> 5;
7369                         pipe_id = (ring_id & 0x18) >> 3;
7370                         queue_id = (ring_id & 0x7) >> 0;
7371                         switch (me_id) {
7372                         case 0:
7373                                 /* This results in a full GPU reset, but all we need to do is soft
7374                                  * reset the CP for gfx
7375                                  */
7376                                 queue_reset = true;
7377                                 break;
7378                         case 1:
7379                                 /* XXX compute */
7380                                 queue_reset = true;
7381                                 break;
7382                         case 2:
7383                                 /* XXX compute */
7384                                 queue_reset = true;
7385                                 break;
7386                         }
7387                         break;
7388                 case 224: /* SDMA trap event */
7389                         /* XXX check the bitfield order! */
7390                         me_id = (ring_id & 0x3) >> 0;
7391                         queue_id = (ring_id & 0xc) >> 2;
7392                         DRM_DEBUG("IH: SDMA trap\n");
7393                         switch (me_id) {
7394                         case 0:
7395                                 switch (queue_id) {
7396                                 case 0:
7397                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7398                                         break;
7399                                 case 1:
7400                                         /* XXX compute */
7401                                         break;
7402                                 case 2:
7403                                         /* XXX compute */
7404                                         break;
7405                                 }
7406                                 break;
7407                         case 1:
7408                                 switch (queue_id) {
7409                                 case 0:
7410                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7411                                         break;
7412                                 case 1:
7413                                         /* XXX compute */
7414                                         break;
7415                                 case 2:
7416                                         /* XXX compute */
7417                                         break;
7418                                 }
7419                                 break;
7420                         }
7421                         break;
7422                 case 230: /* thermal low to high */
7423                         DRM_DEBUG("IH: thermal low to high\n");
7424                         rdev->pm.dpm.thermal.high_to_low = false;
7425                         queue_thermal = true;
7426                         break;
7427                 case 231: /* thermal high to low */
7428                         DRM_DEBUG("IH: thermal high to low\n");
7429                         rdev->pm.dpm.thermal.high_to_low = true;
7430                         queue_thermal = true;
7431                         break;
7432                 case 233: /* GUI IDLE */
7433                         DRM_DEBUG("IH: GUI idle\n");
7434                         break;
7435                 case 241: /* SDMA Privileged inst */
7436                 case 247: /* SDMA Privileged inst */
7437                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7438                         /* XXX check the bitfield order! */
7439                         me_id = (ring_id & 0x3) >> 0;
7440                         queue_id = (ring_id & 0xc) >> 2;
7441                         switch (me_id) {
7442                         case 0:
7443                                 switch (queue_id) {
7444                                 case 0:
7445                                         queue_reset = true;
7446                                         break;
7447                                 case 1:
7448                                         /* XXX compute */
7449                                         queue_reset = true;
7450                                         break;
7451                                 case 2:
7452                                         /* XXX compute */
7453                                         queue_reset = true;
7454                                         break;
7455                                 }
7456                                 break;
7457                         case 1:
7458                                 switch (queue_id) {
7459                                 case 0:
7460                                         queue_reset = true;
7461                                         break;
7462                                 case 1:
7463                                         /* XXX compute */
7464                                         queue_reset = true;
7465                                         break;
7466                                 case 2:
7467                                         /* XXX compute */
7468                                         queue_reset = true;
7469                                         break;
7470                                 }
7471                                 break;
7472                         }
7473                         break;
7474                 default:
7475                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7476                         break;
7477                 }
7478
7479                 /* wptr/rptr are in bytes! */
7480                 rptr += 16;
7481                 rptr &= rdev->ih.ptr_mask;
7482         }
7483         if (queue_hotplug)
7484                 schedule_work(&rdev->hotplug_work);
7485         if (queue_reset)
7486                 schedule_work(&rdev->reset_work);
7487         if (queue_thermal)
7488                 schedule_work(&rdev->pm.dpm.thermal.work);
7489         rdev->ih.rptr = rptr;
7490         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7491         atomic_set(&rdev->ih.lock, 0);
7492
7493         /* make sure wptr hasn't changed while processing */
7494         wptr = cik_get_ih_wptr(rdev);
7495         if (wptr != rptr)
7496                 goto restart_ih;
7497
7498         return IRQ_HANDLED;
7499 }
7500
7501 /*
7502  * startup/shutdown callbacks
7503  */
7504 /**
7505  * cik_startup - program the asic to a functional state
7506  *
7507  * @rdev: radeon_device pointer
7508  *
7509  * Programs the asic to a functional state (CIK).
7510  * Called by cik_init() and cik_resume().
7511  * Returns 0 for success, error for failure.
7512  */
7513 static int cik_startup(struct radeon_device *rdev)
7514 {
7515         struct radeon_ring *ring;
7516         int r;
7517
7518         /* enable pcie gen2/3 link */
7519         cik_pcie_gen3_enable(rdev);
7520         /* enable aspm */
7521         cik_program_aspm(rdev);
7522
7523         cik_mc_program(rdev);
7524
7525         if (rdev->flags & RADEON_IS_IGP) {
7526                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7527                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7528                         r = cik_init_microcode(rdev);
7529                         if (r) {
7530                                 DRM_ERROR("Failed to load firmware!\n");
7531                                 return r;
7532                         }
7533                 }
7534         } else {
7535                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7536                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7537                     !rdev->mc_fw) {
7538                         r = cik_init_microcode(rdev);
7539                         if (r) {
7540                                 DRM_ERROR("Failed to load firmware!\n");
7541                                 return r;
7542                         }
7543                 }
7544
7545                 r = ci_mc_load_microcode(rdev);
7546                 if (r) {
7547                         DRM_ERROR("Failed to load MC firmware!\n");
7548                         return r;
7549                 }
7550         }
7551
7552         r = r600_vram_scratch_init(rdev);
7553         if (r)
7554                 return r;
7555
7556         r = cik_pcie_gart_enable(rdev);
7557         if (r)
7558                 return r;
7559         cik_gpu_init(rdev);
7560
7561         /* allocate rlc buffers */
7562         if (rdev->flags & RADEON_IS_IGP) {
7563                 if (rdev->family == CHIP_KAVERI) {
7564                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7565                         rdev->rlc.reg_list_size =
7566                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7567                 } else {
7568                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7569                         rdev->rlc.reg_list_size =
7570                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7571                 }
7572         }
7573         rdev->rlc.cs_data = ci_cs_data;
7574         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7575         r = sumo_rlc_init(rdev);
7576         if (r) {
7577                 DRM_ERROR("Failed to init rlc BOs!\n");
7578                 return r;
7579         }
7580
7581         /* allocate wb buffer */
7582         r = radeon_wb_init(rdev);
7583         if (r)
7584                 return r;
7585
7586         /* allocate mec buffers */
7587         r = cik_mec_init(rdev);
7588         if (r) {
7589                 DRM_ERROR("Failed to init MEC BOs!\n");
7590                 return r;
7591         }
7592
7593         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7594         if (r) {
7595                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7596                 return r;
7597         }
7598
7599         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7600         if (r) {
7601                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7602                 return r;
7603         }
7604
7605         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7606         if (r) {
7607                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7608                 return r;
7609         }
7610
7611         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7612         if (r) {
7613                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7614                 return r;
7615         }
7616
7617         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7618         if (r) {
7619                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7620                 return r;
7621         }
7622
7623         r = radeon_uvd_resume(rdev);
7624         if (!r) {
7625                 cik_uvd_resume(rdev);
7626                 r = radeon_fence_driver_start_ring(rdev,
7627                                                    R600_RING_TYPE_UVD_INDEX);
7628                 if (r)
7629                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7630         }
7631         if (r)
7632                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7633
7634         /* Enable IRQ */
7635         if (!rdev->irq.installed) {
7636                 r = radeon_irq_kms_init(rdev);
7637                 if (r)
7638                         return r;
7639         }
7640
7641         r = cik_irq_init(rdev);
7642         if (r) {
7643                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7644                 radeon_irq_kms_fini(rdev);
7645                 return r;
7646         }
7647         cik_irq_set(rdev);
7648
7649         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7650         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7651                              CP_RB0_RPTR, CP_RB0_WPTR,
7652                              0, 0xfffff, RADEON_CP_PACKET2);
7653         if (r)
7654                 return r;
7655
7656         /* set up the compute queues */
7657         /* type-2 packets are deprecated on MEC, use type-3 instead */
7658         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7659         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7660                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7661                              0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
7662         if (r)
7663                 return r;
7664         ring->me = 1; /* first MEC */
7665         ring->pipe = 0; /* first pipe */
7666         ring->queue = 0; /* first queue */
7667         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7668
7669         /* type-2 packets are deprecated on MEC, use type-3 instead */
7670         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7671         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7672                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7673                              0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
7674         if (r)
7675                 return r;
7676         /* dGPU only have 1 MEC */
7677         ring->me = 1; /* first MEC */
7678         ring->pipe = 0; /* first pipe */
7679         ring->queue = 1; /* second queue */
7680         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7681
7682         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7683         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7684                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7685                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7686                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7687         if (r)
7688                 return r;
7689
7690         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7691         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7692                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7693                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7694                              2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7695         if (r)
7696                 return r;
7697
7698         r = cik_cp_resume(rdev);
7699         if (r)
7700                 return r;
7701
7702         r = cik_sdma_resume(rdev);
7703         if (r)
7704                 return r;
7705
7706         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7707         if (ring->ring_size) {
7708                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7709                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7710                                      0, 0xfffff, RADEON_CP_PACKET2);
7711                 if (!r)
7712                         r = r600_uvd_init(rdev, true);
7713                 if (r)
7714                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7715         }
7716
7717         r = radeon_ib_pool_init(rdev);
7718         if (r) {
7719                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7720                 return r;
7721         }
7722
7723         r = radeon_vm_manager_init(rdev);
7724         if (r) {
7725                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7726                 return r;
7727         }
7728
7729         return 0;
7730 }
7731
7732 /**
7733  * cik_resume - resume the asic to a functional state
7734  *
7735  * @rdev: radeon_device pointer
7736  *
7737  * Programs the asic to a functional state (CIK).
7738  * Called at resume.
7739  * Returns 0 for success, error for failure.
7740  */
7741 int cik_resume(struct radeon_device *rdev)
7742 {
7743         int r;
7744
7745         /* post card */
7746         atom_asic_init(rdev->mode_info.atom_context);
7747
7748         /* init golden registers */
7749         cik_init_golden_registers(rdev);
7750
7751         rdev->accel_working = true;
7752         r = cik_startup(rdev);
7753         if (r) {
7754                 DRM_ERROR("cik startup failed on resume\n");
7755                 rdev->accel_working = false;
7756                 return r;
7757         }
7758
7759         return r;
7760
7761 }
7762
7763 /**
7764  * cik_suspend - suspend the asic
7765  *
7766  * @rdev: radeon_device pointer
7767  *
7768  * Bring the chip into a state suitable for suspend (CIK).
7769  * Called at suspend.
7770  * Returns 0 for success.
7771  */
7772 int cik_suspend(struct radeon_device *rdev)
7773 {
7774         radeon_vm_manager_fini(rdev);
7775         cik_cp_enable(rdev, false);
7776         cik_sdma_enable(rdev, false);
7777         r600_uvd_stop(rdev);
7778         radeon_uvd_suspend(rdev);
7779         cik_irq_suspend(rdev);
7780         radeon_wb_disable(rdev);
7781         cik_pcie_gart_disable(rdev);
7782         return 0;
7783 }
7784
7785 /* Plan is to move initialization in that function and use
7786  * helper function so that radeon_device_init pretty much
7787  * do nothing more than calling asic specific function. This
7788  * should also allow to remove a bunch of callback function
7789  * like vram_info.
7790  */
7791 /**
7792  * cik_init - asic specific driver and hw init
7793  *
7794  * @rdev: radeon_device pointer
7795  *
7796  * Setup asic specific driver variables and program the hw
7797  * to a functional state (CIK).
7798  * Called at driver startup.
7799  * Returns 0 for success, errors for failure.
7800  */
7801 int cik_init(struct radeon_device *rdev)
7802 {
7803         struct radeon_ring *ring;
7804         int r;
7805
7806         /* Read BIOS */
7807         if (!radeon_get_bios(rdev)) {
7808                 if (ASIC_IS_AVIVO(rdev))
7809                         return -EINVAL;
7810         }
7811         /* Must be an ATOMBIOS */
7812         if (!rdev->is_atom_bios) {
7813                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7814                 return -EINVAL;
7815         }
7816         r = radeon_atombios_init(rdev);
7817         if (r)
7818                 return r;
7819
7820         /* Post card if necessary */
7821         if (!radeon_card_posted(rdev)) {
7822                 if (!rdev->bios) {
7823                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7824                         return -EINVAL;
7825                 }
7826                 DRM_INFO("GPU not posted. posting now...\n");
7827                 atom_asic_init(rdev->mode_info.atom_context);
7828         }
7829         /* init golden registers */
7830         cik_init_golden_registers(rdev);
7831         /* Initialize scratch registers */
7832         cik_scratch_init(rdev);
7833         /* Initialize surface registers */
7834         radeon_surface_init(rdev);
7835         /* Initialize clocks */
7836         radeon_get_clock_info(rdev->ddev);
7837
7838         /* Fence driver */
7839         r = radeon_fence_driver_init(rdev);
7840         if (r)
7841                 return r;
7842
7843         /* initialize memory controller */
7844         r = cik_mc_init(rdev);
7845         if (r)
7846                 return r;
7847         /* Memory manager */
7848         r = radeon_bo_init(rdev);
7849         if (r)
7850                 return r;
7851
7852         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7853         ring->ring_obj = NULL;
7854         r600_ring_init(rdev, ring, 1024 * 1024);
7855
7856         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7857         ring->ring_obj = NULL;
7858         r600_ring_init(rdev, ring, 1024 * 1024);
7859         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7860         if (r)
7861                 return r;
7862
7863         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7864         ring->ring_obj = NULL;
7865         r600_ring_init(rdev, ring, 1024 * 1024);
7866         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7867         if (r)
7868                 return r;
7869
7870         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7871         ring->ring_obj = NULL;
7872         r600_ring_init(rdev, ring, 256 * 1024);
7873
7874         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7875         ring->ring_obj = NULL;
7876         r600_ring_init(rdev, ring, 256 * 1024);
7877
7878         r = radeon_uvd_init(rdev);
7879         if (!r) {
7880                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7881                 ring->ring_obj = NULL;
7882                 r600_ring_init(rdev, ring, 4096);
7883         }
7884
7885         rdev->ih.ring_obj = NULL;
7886         r600_ih_ring_init(rdev, 64 * 1024);
7887
7888         r = r600_pcie_gart_init(rdev);
7889         if (r)
7890                 return r;
7891
7892         rdev->accel_working = true;
7893         r = cik_startup(rdev);
7894         if (r) {
7895                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7896                 cik_cp_fini(rdev);
7897                 cik_sdma_fini(rdev);
7898                 cik_irq_fini(rdev);
7899                 sumo_rlc_fini(rdev);
7900                 cik_mec_fini(rdev);
7901                 radeon_wb_fini(rdev);
7902                 radeon_ib_pool_fini(rdev);
7903                 radeon_vm_manager_fini(rdev);
7904                 radeon_irq_kms_fini(rdev);
7905                 cik_pcie_gart_fini(rdev);
7906                 rdev->accel_working = false;
7907         }
7908
7909         /* Don't start up if the MC ucode is missing.
7910          * The default clocks and voltages before the MC ucode
7911          * is loaded are not suffient for advanced operations.
7912          */
7913         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7914                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7915                 return -EINVAL;
7916         }
7917
7918         return 0;
7919 }
7920
7921 /**
7922  * cik_fini - asic specific driver and hw fini
7923  *
7924  * @rdev: radeon_device pointer
7925  *
7926  * Tear down the asic specific driver variables and program the hw
7927  * to an idle state (CIK).
7928  * Called at driver unload.
7929  */
7930 void cik_fini(struct radeon_device *rdev)
7931 {
7932         cik_cp_fini(rdev);
7933         cik_sdma_fini(rdev);
7934         cik_irq_fini(rdev);
7935         sumo_rlc_fini(rdev);
7936         cik_mec_fini(rdev);
7937         radeon_wb_fini(rdev);
7938         radeon_vm_manager_fini(rdev);
7939         radeon_ib_pool_fini(rdev);
7940         radeon_irq_kms_fini(rdev);
7941         r600_uvd_stop(rdev);
7942         radeon_uvd_fini(rdev);
7943         cik_pcie_gart_fini(rdev);
7944         r600_vram_scratch_fini(rdev);
7945         radeon_gem_fini(rdev);
7946         radeon_fence_driver_fini(rdev);
7947         radeon_bo_fini(rdev);
7948         radeon_atombios_fini(rdev);
7949         kfree(rdev->bios);
7950         rdev->bios = NULL;
7951 }
7952
7953 /* display watermark setup */
7954 /**
7955  * dce8_line_buffer_adjust - Set up the line buffer
7956  *
7957  * @rdev: radeon_device pointer
7958  * @radeon_crtc: the selected display controller
7959  * @mode: the current display mode on the selected display
7960  * controller
7961  *
7962  * Setup up the line buffer allocation for
7963  * the selected display controller (CIK).
7964  * Returns the line buffer size in pixels.
7965  */
7966 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7967                                    struct radeon_crtc *radeon_crtc,
7968                                    struct drm_display_mode *mode)
7969 {
7970         u32 tmp;
7971
7972         /*
7973          * Line Buffer Setup
7974          * There are 6 line buffers, one for each display controllers.
7975          * There are 3 partitions per LB. Select the number of partitions
7976          * to enable based on the display width.  For display widths larger
7977          * than 4096, you need use to use 2 display controllers and combine
7978          * them using the stereo blender.
7979          */
7980         if (radeon_crtc->base.enabled && mode) {
7981                 if (mode->crtc_hdisplay < 1920)
7982                         tmp = 1;
7983                 else if (mode->crtc_hdisplay < 2560)
7984                         tmp = 2;
7985                 else if (mode->crtc_hdisplay < 4096)
7986                         tmp = 0;
7987                 else {
7988                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7989                         tmp = 0;
7990                 }
7991         } else
7992                 tmp = 1;
7993
7994         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7995                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7996
7997         if (radeon_crtc->base.enabled && mode) {
7998                 switch (tmp) {
7999                 case 0:
8000                 default:
8001                         return 4096 * 2;
8002                 case 1:
8003                         return 1920 * 2;
8004                 case 2:
8005                         return 2560 * 2;
8006                 }
8007         }
8008
8009         /* controller not enabled, so no lb used */
8010         return 0;
8011 }
8012
8013 /**
8014  * cik_get_number_of_dram_channels - get the number of dram channels
8015  *
8016  * @rdev: radeon_device pointer
8017  *
8018  * Look up the number of video ram channels (CIK).
8019  * Used for display watermark bandwidth calculations
8020  * Returns the number of dram channels
8021  */
8022 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8023 {
8024         u32 tmp = RREG32(MC_SHARED_CHMAP);
8025
8026         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8027         case 0:
8028         default:
8029                 return 1;
8030         case 1:
8031                 return 2;
8032         case 2:
8033                 return 4;
8034         case 3:
8035                 return 8;
8036         case 4:
8037                 return 3;
8038         case 5:
8039                 return 6;
8040         case 6:
8041                 return 10;
8042         case 7:
8043                 return 12;
8044         case 8:
8045                 return 16;
8046         }
8047 }
8048
8049 struct dce8_wm_params {
8050         u32 dram_channels; /* number of dram channels */
8051         u32 yclk;          /* bandwidth per dram data pin in kHz */
8052         u32 sclk;          /* engine clock in kHz */
8053         u32 disp_clk;      /* display clock in kHz */
8054         u32 src_width;     /* viewport width */
8055         u32 active_time;   /* active display time in ns */
8056         u32 blank_time;    /* blank time in ns */
8057         bool interlaced;    /* mode is interlaced */
8058         fixed20_12 vsc;    /* vertical scale ratio */
8059         u32 num_heads;     /* number of active crtcs */
8060         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8061         u32 lb_size;       /* line buffer allocated to pipe */
8062         u32 vtaps;         /* vertical scaler taps */
8063 };
8064
8065 /**
8066  * dce8_dram_bandwidth - get the dram bandwidth
8067  *
8068  * @wm: watermark calculation data
8069  *
8070  * Calculate the raw dram bandwidth (CIK).
8071  * Used for display watermark bandwidth calculations
8072  * Returns the dram bandwidth in MBytes/s
8073  */
8074 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8075 {
8076         /* Calculate raw DRAM Bandwidth */
8077         fixed20_12 dram_efficiency; /* 0.7 */
8078         fixed20_12 yclk, dram_channels, bandwidth;
8079         fixed20_12 a;
8080
8081         a.full = dfixed_const(1000);
8082         yclk.full = dfixed_const(wm->yclk);
8083         yclk.full = dfixed_div(yclk, a);
8084         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8085         a.full = dfixed_const(10);
8086         dram_efficiency.full = dfixed_const(7);
8087         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8088         bandwidth.full = dfixed_mul(dram_channels, yclk);
8089         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8090
8091         return dfixed_trunc(bandwidth);
8092 }
8093
8094 /**
8095  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8096  *
8097  * @wm: watermark calculation data
8098  *
8099  * Calculate the dram bandwidth used for display (CIK).
8100  * Used for display watermark bandwidth calculations
8101  * Returns the dram bandwidth for display in MBytes/s
8102  */
8103 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8104 {
8105         /* Calculate DRAM Bandwidth and the part allocated to display. */
8106         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8107         fixed20_12 yclk, dram_channels, bandwidth;
8108         fixed20_12 a;
8109
8110         a.full = dfixed_const(1000);
8111         yclk.full = dfixed_const(wm->yclk);
8112         yclk.full = dfixed_div(yclk, a);
8113         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8114         a.full = dfixed_const(10);
8115         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8116         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8117         bandwidth.full = dfixed_mul(dram_channels, yclk);
8118         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8119
8120         return dfixed_trunc(bandwidth);
8121 }
8122
8123 /**
8124  * dce8_data_return_bandwidth - get the data return bandwidth
8125  *
8126  * @wm: watermark calculation data
8127  *
8128  * Calculate the data return bandwidth used for display (CIK).
8129  * Used for display watermark bandwidth calculations
8130  * Returns the data return bandwidth in MBytes/s
8131  */
8132 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8133 {
8134         /* Calculate the display Data return Bandwidth */
8135         fixed20_12 return_efficiency; /* 0.8 */
8136         fixed20_12 sclk, bandwidth;
8137         fixed20_12 a;
8138
8139         a.full = dfixed_const(1000);
8140         sclk.full = dfixed_const(wm->sclk);
8141         sclk.full = dfixed_div(sclk, a);
8142         a.full = dfixed_const(10);
8143         return_efficiency.full = dfixed_const(8);
8144         return_efficiency.full = dfixed_div(return_efficiency, a);
8145         a.full = dfixed_const(32);
8146         bandwidth.full = dfixed_mul(a, sclk);
8147         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8148
8149         return dfixed_trunc(bandwidth);
8150 }
8151
8152 /**
8153  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8154  *
8155  * @wm: watermark calculation data
8156  *
8157  * Calculate the dmif bandwidth used for display (CIK).
8158  * Used for display watermark bandwidth calculations
8159  * Returns the dmif bandwidth in MBytes/s
8160  */
8161 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8162 {
8163         /* Calculate the DMIF Request Bandwidth */
8164         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8165         fixed20_12 disp_clk, bandwidth;
8166         fixed20_12 a, b;
8167
8168         a.full = dfixed_const(1000);
8169         disp_clk.full = dfixed_const(wm->disp_clk);
8170         disp_clk.full = dfixed_div(disp_clk, a);
8171         a.full = dfixed_const(32);
8172         b.full = dfixed_mul(a, disp_clk);
8173
8174         a.full = dfixed_const(10);
8175         disp_clk_request_efficiency.full = dfixed_const(8);
8176         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8177
8178         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8179
8180         return dfixed_trunc(bandwidth);
8181 }
8182
8183 /**
8184  * dce8_available_bandwidth - get the min available bandwidth
8185  *
8186  * @wm: watermark calculation data
8187  *
8188  * Calculate the min available bandwidth used for display (CIK).
8189  * Used for display watermark bandwidth calculations
8190  * Returns the min available bandwidth in MBytes/s
8191  */
8192 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8193 {
8194         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8195         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8196         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8197         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8198
8199         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8200 }
8201
8202 /**
8203  * dce8_average_bandwidth - get the average available bandwidth
8204  *
8205  * @wm: watermark calculation data
8206  *
8207  * Calculate the average available bandwidth used for display (CIK).
8208  * Used for display watermark bandwidth calculations
8209  * Returns the average available bandwidth in MBytes/s
8210  */
8211 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8212 {
8213         /* Calculate the display mode Average Bandwidth
8214          * DisplayMode should contain the source and destination dimensions,
8215          * timing, etc.
8216          */
8217         fixed20_12 bpp;
8218         fixed20_12 line_time;
8219         fixed20_12 src_width;
8220         fixed20_12 bandwidth;
8221         fixed20_12 a;
8222
8223         a.full = dfixed_const(1000);
8224         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8225         line_time.full = dfixed_div(line_time, a);
8226         bpp.full = dfixed_const(wm->bytes_per_pixel);
8227         src_width.full = dfixed_const(wm->src_width);
8228         bandwidth.full = dfixed_mul(src_width, bpp);
8229         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8230         bandwidth.full = dfixed_div(bandwidth, line_time);
8231
8232         return dfixed_trunc(bandwidth);
8233 }
8234
8235 /**
8236  * dce8_latency_watermark - get the latency watermark
8237  *
8238  * @wm: watermark calculation data
8239  *
8240  * Calculate the latency watermark (CIK).
8241  * Used for display watermark bandwidth calculations
8242  * Returns the latency watermark in ns
8243  */
8244 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8245 {
8246         /* First calculate the latency in ns */
8247         u32 mc_latency = 2000; /* 2000 ns. */
8248         u32 available_bandwidth = dce8_available_bandwidth(wm);
8249         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8250         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8251         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8252         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8253                 (wm->num_heads * cursor_line_pair_return_time);
8254         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8255         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8256         u32 tmp, dmif_size = 12288;
8257         fixed20_12 a, b, c;
8258
8259         if (wm->num_heads == 0)
8260                 return 0;
8261
8262         a.full = dfixed_const(2);
8263         b.full = dfixed_const(1);
8264         if ((wm->vsc.full > a.full) ||
8265             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8266             (wm->vtaps >= 5) ||
8267             ((wm->vsc.full >= a.full) && wm->interlaced))
8268                 max_src_lines_per_dst_line = 4;
8269         else
8270                 max_src_lines_per_dst_line = 2;
8271
8272         a.full = dfixed_const(available_bandwidth);
8273         b.full = dfixed_const(wm->num_heads);
8274         a.full = dfixed_div(a, b);
8275
8276         b.full = dfixed_const(mc_latency + 512);
8277         c.full = dfixed_const(wm->disp_clk);
8278         b.full = dfixed_div(b, c);
8279
8280         c.full = dfixed_const(dmif_size);
8281         b.full = dfixed_div(c, b);
8282
8283         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8284
8285         b.full = dfixed_const(1000);
8286         c.full = dfixed_const(wm->disp_clk);
8287         b.full = dfixed_div(c, b);
8288         c.full = dfixed_const(wm->bytes_per_pixel);
8289         b.full = dfixed_mul(b, c);
8290
8291         lb_fill_bw = min(tmp, dfixed_trunc(b));
8292
8293         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8294         b.full = dfixed_const(1000);
8295         c.full = dfixed_const(lb_fill_bw);
8296         b.full = dfixed_div(c, b);
8297         a.full = dfixed_div(a, b);
8298         line_fill_time = dfixed_trunc(a);
8299
8300         if (line_fill_time < wm->active_time)
8301                 return latency;
8302         else
8303                 return latency + (line_fill_time - wm->active_time);
8304
8305 }
8306
8307 /**
8308  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8309  * average and available dram bandwidth
8310  *
8311  * @wm: watermark calculation data
8312  *
8313  * Check if the display average bandwidth fits in the display
8314  * dram bandwidth (CIK).
8315  * Used for display watermark bandwidth calculations
8316  * Returns true if the display fits, false if not.
8317  */
8318 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8319 {
8320         if (dce8_average_bandwidth(wm) <=
8321             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8322                 return true;
8323         else
8324                 return false;
8325 }
8326
8327 /**
8328  * dce8_average_bandwidth_vs_available_bandwidth - check
8329  * average and available bandwidth
8330  *
8331  * @wm: watermark calculation data
8332  *
8333  * Check if the display average bandwidth fits in the display
8334  * available bandwidth (CIK).
8335  * Used for display watermark bandwidth calculations
8336  * Returns true if the display fits, false if not.
8337  */
8338 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8339 {
8340         if (dce8_average_bandwidth(wm) <=
8341             (dce8_available_bandwidth(wm) / wm->num_heads))
8342                 return true;
8343         else
8344                 return false;
8345 }
8346
8347 /**
8348  * dce8_check_latency_hiding - check latency hiding
8349  *
8350  * @wm: watermark calculation data
8351  *
8352  * Check latency hiding (CIK).
8353  * Used for display watermark bandwidth calculations
8354  * Returns true if the display fits, false if not.
8355  */
8356 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8357 {
8358         u32 lb_partitions = wm->lb_size / wm->src_width;
8359         u32 line_time = wm->active_time + wm->blank_time;
8360         u32 latency_tolerant_lines;
8361         u32 latency_hiding;
8362         fixed20_12 a;
8363
8364         a.full = dfixed_const(1);
8365         if (wm->vsc.full > a.full)
8366                 latency_tolerant_lines = 1;
8367         else {
8368                 if (lb_partitions <= (wm->vtaps + 1))
8369                         latency_tolerant_lines = 1;
8370                 else
8371                         latency_tolerant_lines = 2;
8372         }
8373
8374         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8375
8376         if (dce8_latency_watermark(wm) <= latency_hiding)
8377                 return true;
8378         else
8379                 return false;
8380 }
8381
8382 /**
8383  * dce8_program_watermarks - program display watermarks
8384  *
8385  * @rdev: radeon_device pointer
8386  * @radeon_crtc: the selected display controller
8387  * @lb_size: line buffer size
8388  * @num_heads: number of display controllers in use
8389  *
8390  * Calculate and program the display watermarks for the
8391  * selected display controller (CIK).
8392  */
8393 static void dce8_program_watermarks(struct radeon_device *rdev,
8394                                     struct radeon_crtc *radeon_crtc,
8395                                     u32 lb_size, u32 num_heads)
8396 {
8397         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8398         struct dce8_wm_params wm_low, wm_high;
8399         u32 pixel_period;
8400         u32 line_time = 0;
8401         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8402         u32 tmp, wm_mask;
8403
8404         if (radeon_crtc->base.enabled && num_heads && mode) {
8405                 pixel_period = 1000000 / (u32)mode->clock;
8406                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8407
8408                 /* watermark for high clocks */
8409                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8410                     rdev->pm.dpm_enabled) {
8411                         wm_high.yclk =
8412                                 radeon_dpm_get_mclk(rdev, false) * 10;
8413                         wm_high.sclk =
8414                                 radeon_dpm_get_sclk(rdev, false) * 10;
8415                 } else {
8416                         wm_high.yclk = rdev->pm.current_mclk * 10;
8417                         wm_high.sclk = rdev->pm.current_sclk * 10;
8418                 }
8419
8420                 wm_high.disp_clk = mode->clock;
8421                 wm_high.src_width = mode->crtc_hdisplay;
8422                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8423                 wm_high.blank_time = line_time - wm_high.active_time;
8424                 wm_high.interlaced = false;
8425                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8426                         wm_high.interlaced = true;
8427                 wm_high.vsc = radeon_crtc->vsc;
8428                 wm_high.vtaps = 1;
8429                 if (radeon_crtc->rmx_type != RMX_OFF)
8430                         wm_high.vtaps = 2;
8431                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8432                 wm_high.lb_size = lb_size;
8433                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8434                 wm_high.num_heads = num_heads;
8435
8436                 /* set for high clocks */
8437                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8438
8439                 /* possibly force display priority to high */
8440                 /* should really do this at mode validation time... */
8441                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8442                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8443                     !dce8_check_latency_hiding(&wm_high) ||
8444                     (rdev->disp_priority == 2)) {
8445                         DRM_DEBUG_KMS("force priority to high\n");
8446                 }
8447
8448                 /* watermark for low clocks */
8449                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8450                     rdev->pm.dpm_enabled) {
8451                         wm_low.yclk =
8452                                 radeon_dpm_get_mclk(rdev, true) * 10;
8453                         wm_low.sclk =
8454                                 radeon_dpm_get_sclk(rdev, true) * 10;
8455                 } else {
8456                         wm_low.yclk = rdev->pm.current_mclk * 10;
8457                         wm_low.sclk = rdev->pm.current_sclk * 10;
8458                 }
8459
8460                 wm_low.disp_clk = mode->clock;
8461                 wm_low.src_width = mode->crtc_hdisplay;
8462                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8463                 wm_low.blank_time = line_time - wm_low.active_time;
8464                 wm_low.interlaced = false;
8465                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8466                         wm_low.interlaced = true;
8467                 wm_low.vsc = radeon_crtc->vsc;
8468                 wm_low.vtaps = 1;
8469                 if (radeon_crtc->rmx_type != RMX_OFF)
8470                         wm_low.vtaps = 2;
8471                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8472                 wm_low.lb_size = lb_size;
8473                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8474                 wm_low.num_heads = num_heads;
8475
8476                 /* set for low clocks */
8477                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8478
8479                 /* possibly force display priority to high */
8480                 /* should really do this at mode validation time... */
8481                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8482                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8483                     !dce8_check_latency_hiding(&wm_low) ||
8484                     (rdev->disp_priority == 2)) {
8485                         DRM_DEBUG_KMS("force priority to high\n");
8486                 }
8487         }
8488
8489         /* select wm A */
8490         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8491         tmp = wm_mask;
8492         tmp &= ~LATENCY_WATERMARK_MASK(3);
8493         tmp |= LATENCY_WATERMARK_MASK(1);
8494         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8495         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8496                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8497                 LATENCY_HIGH_WATERMARK(line_time)));
8498         /* select wm B */
8499         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8500         tmp &= ~LATENCY_WATERMARK_MASK(3);
8501         tmp |= LATENCY_WATERMARK_MASK(2);
8502         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8503         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8504                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8505                 LATENCY_HIGH_WATERMARK(line_time)));
8506         /* restore original selection */
8507         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8508
8509         /* save values for DPM */
8510         radeon_crtc->line_time = line_time;
8511         radeon_crtc->wm_high = latency_watermark_a;
8512         radeon_crtc->wm_low = latency_watermark_b;
8513 }
8514
8515 /**
8516  * dce8_bandwidth_update - program display watermarks
8517  *
8518  * @rdev: radeon_device pointer
8519  *
8520  * Calculate and program the display watermarks and line
8521  * buffer allocation (CIK).
8522  */
8523 void dce8_bandwidth_update(struct radeon_device *rdev)
8524 {
8525         struct drm_display_mode *mode = NULL;
8526         u32 num_heads = 0, lb_size;
8527         int i;
8528
8529         radeon_update_display_priority(rdev);
8530
8531         for (i = 0; i < rdev->num_crtc; i++) {
8532                 if (rdev->mode_info.crtcs[i]->base.enabled)
8533                         num_heads++;
8534         }
8535         for (i = 0; i < rdev->num_crtc; i++) {
8536                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8537                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8538                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8539         }
8540 }
8541
8542 /**
8543  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8544  *
8545  * @rdev: radeon_device pointer
8546  *
8547  * Fetches a GPU clock counter snapshot (SI).
8548  * Returns the 64 bit clock counter snapshot.
8549  */
8550 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8551 {
8552         uint64_t clock;
8553
8554         mutex_lock(&rdev->gpu_clock_mutex);
8555         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8556         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8557                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8558         mutex_unlock(&rdev->gpu_clock_mutex);
8559         return clock;
8560 }
8561
8562 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8563                               u32 cntl_reg, u32 status_reg)
8564 {
8565         int r, i;
8566         struct atom_clock_dividers dividers;
8567         uint32_t tmp;
8568
8569         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8570                                            clock, false, &dividers);
8571         if (r)
8572                 return r;
8573
8574         tmp = RREG32_SMC(cntl_reg);
8575         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8576         tmp |= dividers.post_divider;
8577         WREG32_SMC(cntl_reg, tmp);
8578
8579         for (i = 0; i < 100; i++) {
8580                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8581                         break;
8582                 mdelay(10);
8583         }
8584         if (i == 100)
8585                 return -ETIMEDOUT;
8586
8587         return 0;
8588 }
8589
8590 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8591 {
8592         int r = 0;
8593
8594         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8595         if (r)
8596                 return r;
8597
8598         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8599         return r;
8600 }
8601
8602 void cik_uvd_resume(struct radeon_device *rdev)
8603 {
8604         uint64_t addr;
8605         uint32_t size;
8606
8607         /* programm the VCPU memory controller bits 0-27 */
8608         addr = rdev->uvd.gpu_addr >> 3;
8609         size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
8610         WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8611         WREG32(UVD_VCPU_CACHE_SIZE0, size);
8612
8613         addr += size;
8614         size = RADEON_UVD_STACK_SIZE >> 3;
8615         WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8616         WREG32(UVD_VCPU_CACHE_SIZE1, size);
8617
8618         addr += size;
8619         size = RADEON_UVD_HEAP_SIZE >> 3;
8620         WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8621         WREG32(UVD_VCPU_CACHE_SIZE2, size);
8622
8623         /* bits 28-31 */
8624         addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8625         WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8626
8627         /* bits 32-39 */
8628         addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8629         WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8630
8631 }
8632
8633 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8634 {
8635         struct pci_dev *root = rdev->pdev->bus->self;
8636         int bridge_pos, gpu_pos;
8637         u32 speed_cntl, mask, current_data_rate;
8638         int ret, i;
8639         u16 tmp16;
8640
8641         if (radeon_pcie_gen2 == 0)
8642                 return;
8643
8644         if (rdev->flags & RADEON_IS_IGP)
8645                 return;
8646
8647         if (!(rdev->flags & RADEON_IS_PCIE))
8648                 return;
8649
8650         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8651         if (ret != 0)
8652                 return;
8653
8654         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8655                 return;
8656
8657         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8658         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8659                 LC_CURRENT_DATA_RATE_SHIFT;
8660         if (mask & DRM_PCIE_SPEED_80) {
8661                 if (current_data_rate == 2) {
8662                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8663                         return;
8664                 }
8665                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8666         } else if (mask & DRM_PCIE_SPEED_50) {
8667                 if (current_data_rate == 1) {
8668                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8669                         return;
8670                 }
8671                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8672         }
8673
8674         bridge_pos = pci_pcie_cap(root);
8675         if (!bridge_pos)
8676                 return;
8677
8678         gpu_pos = pci_pcie_cap(rdev->pdev);
8679         if (!gpu_pos)
8680                 return;
8681
8682         if (mask & DRM_PCIE_SPEED_80) {
8683                 /* re-try equalization if gen3 is not already enabled */
8684                 if (current_data_rate != 2) {
8685                         u16 bridge_cfg, gpu_cfg;
8686                         u16 bridge_cfg2, gpu_cfg2;
8687                         u32 max_lw, current_lw, tmp;
8688
8689                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8690                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8691
8692                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8693                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8694
8695                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8696                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8697
8698                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8699                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8700                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8701
8702                         if (current_lw < max_lw) {
8703                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8704                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8705                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8706                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8707                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8708                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8709                                 }
8710                         }
8711
8712                         for (i = 0; i < 10; i++) {
8713                                 /* check status */
8714                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8715                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8716                                         break;
8717
8718                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8719                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8720
8721                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8722                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8723
8724                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8725                                 tmp |= LC_SET_QUIESCE;
8726                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8727
8728                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8729                                 tmp |= LC_REDO_EQ;
8730                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8731
8732                                 mdelay(100);
8733
8734                                 /* linkctl */
8735                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8736                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8737                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8738                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8739
8740                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8741                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8742                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8743                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8744
8745                                 /* linkctl2 */
8746                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8747                                 tmp16 &= ~((1 << 4) | (7 << 9));
8748                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8749                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8750
8751                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8752                                 tmp16 &= ~((1 << 4) | (7 << 9));
8753                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8754                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8755
8756                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8757                                 tmp &= ~LC_SET_QUIESCE;
8758                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8759                         }
8760                 }
8761         }
8762
8763         /* set the link speed */
8764         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8765         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8766         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8767
8768         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8769         tmp16 &= ~0xf;
8770         if (mask & DRM_PCIE_SPEED_80)
8771                 tmp16 |= 3; /* gen3 */
8772         else if (mask & DRM_PCIE_SPEED_50)
8773                 tmp16 |= 2; /* gen2 */
8774         else
8775                 tmp16 |= 1; /* gen1 */
8776         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8777
8778         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8779         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8780         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8781
8782         for (i = 0; i < rdev->usec_timeout; i++) {
8783                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8784                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8785                         break;
8786                 udelay(1);
8787         }
8788 }
8789
8790 static void cik_program_aspm(struct radeon_device *rdev)
8791 {
8792         u32 data, orig;
8793         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8794         bool disable_clkreq = false;
8795
8796         if (radeon_aspm == 0)
8797                 return;
8798
8799         /* XXX double check IGPs */
8800         if (rdev->flags & RADEON_IS_IGP)
8801                 return;
8802
8803         if (!(rdev->flags & RADEON_IS_PCIE))
8804                 return;
8805
8806         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8807         data &= ~LC_XMIT_N_FTS_MASK;
8808         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8809         if (orig != data)
8810                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8811
8812         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8813         data |= LC_GO_TO_RECOVERY;
8814         if (orig != data)
8815                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8816
8817         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8818         data |= P_IGNORE_EDB_ERR;
8819         if (orig != data)
8820                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8821
8822         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8823         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8824         data |= LC_PMI_TO_L1_DIS;
8825         if (!disable_l0s)
8826                 data |= LC_L0S_INACTIVITY(7);
8827
8828         if (!disable_l1) {
8829                 data |= LC_L1_INACTIVITY(7);
8830                 data &= ~LC_PMI_TO_L1_DIS;
8831                 if (orig != data)
8832                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8833
8834                 if (!disable_plloff_in_l1) {
8835                         bool clk_req_support;
8836
8837                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8838                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8839                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8840                         if (orig != data)
8841                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8842
8843                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8844                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8845                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8846                         if (orig != data)
8847                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8848
8849                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8850                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8851                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8852                         if (orig != data)
8853                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8854
8855                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8856                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8857                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8858                         if (orig != data)
8859                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8860
8861                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8862                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8863                         data |= LC_DYN_LANES_PWR_STATE(3);
8864                         if (orig != data)
8865                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8866
8867                         if (!disable_clkreq) {
8868                                 struct pci_dev *root = rdev->pdev->bus->self;
8869                                 u32 lnkcap;
8870
8871                                 clk_req_support = false;
8872                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8873                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8874                                         clk_req_support = true;
8875                         } else {
8876                                 clk_req_support = false;
8877                         }
8878
8879                         if (clk_req_support) {
8880                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8881                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8882                                 if (orig != data)
8883                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8884
8885                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8886                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8887                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8888                                 if (orig != data)
8889                                         WREG32_SMC(THM_CLK_CNTL, data);
8890
8891                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8892                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8893                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8894                                 if (orig != data)
8895                                         WREG32_SMC(MISC_CLK_CTRL, data);
8896
8897                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8898                                 data &= ~BCLK_AS_XCLK;
8899                                 if (orig != data)
8900                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8901
8902                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8903                                 data &= ~FORCE_BIF_REFCLK_EN;
8904                                 if (orig != data)
8905                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8906
8907                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8908                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8909                                 data |= MPLL_CLKOUT_SEL(4);
8910                                 if (orig != data)
8911                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8912                         }
8913                 }
8914         } else {
8915                 if (orig != data)
8916                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8917         }
8918
8919         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8920         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8921         if (orig != data)
8922                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8923
8924         if (!disable_l0s) {
8925                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8926                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8927                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8928                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8929                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8930                                 data &= ~LC_L0S_INACTIVITY_MASK;
8931                                 if (orig != data)
8932                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8933                         }
8934                 }
8935         }
8936 }