4ff59c8f508f6c32d2b6f1b2caaf1d4c8200556a
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81
82 static const u32 verde_rlc_save_restore_register_list[] =
83 {
84         (0x8000 << 16) | (0x98f4 >> 2),
85         0x00000000,
86         (0x8040 << 16) | (0x98f4 >> 2),
87         0x00000000,
88         (0x8000 << 16) | (0xe80 >> 2),
89         0x00000000,
90         (0x8040 << 16) | (0xe80 >> 2),
91         0x00000000,
92         (0x8000 << 16) | (0x89bc >> 2),
93         0x00000000,
94         (0x8040 << 16) | (0x89bc >> 2),
95         0x00000000,
96         (0x8000 << 16) | (0x8c1c >> 2),
97         0x00000000,
98         (0x8040 << 16) | (0x8c1c >> 2),
99         0x00000000,
100         (0x9c00 << 16) | (0x98f0 >> 2),
101         0x00000000,
102         (0x9c00 << 16) | (0xe7c >> 2),
103         0x00000000,
104         (0x8000 << 16) | (0x9148 >> 2),
105         0x00000000,
106         (0x8040 << 16) | (0x9148 >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x9150 >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0x897c >> 2),
111         0x00000000,
112         (0x9c00 << 16) | (0x8d8c >> 2),
113         0x00000000,
114         (0x9c00 << 16) | (0xac54 >> 2),
115         0X00000000,
116         0x3,
117         (0x9c00 << 16) | (0x98f8 >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0x9910 >> 2),
120         0x00000000,
121         (0x9c00 << 16) | (0x9914 >> 2),
122         0x00000000,
123         (0x9c00 << 16) | (0x9918 >> 2),
124         0x00000000,
125         (0x9c00 << 16) | (0x991c >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9920 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9924 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9928 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x992c >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9930 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9934 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9938 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x993c >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9940 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9944 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9948 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x994c >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9950 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9954 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9958 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x995c >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9960 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9964 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9968 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x996c >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9970 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9974 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9978 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x997c >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9980 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9984 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9988 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x998c >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x8c00 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x8c14 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x8c04 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x8c08 >> 2),
190         0x00000000,
191         (0x8000 << 16) | (0x9b7c >> 2),
192         0x00000000,
193         (0x8040 << 16) | (0x9b7c >> 2),
194         0x00000000,
195         (0x8000 << 16) | (0xe84 >> 2),
196         0x00000000,
197         (0x8040 << 16) | (0xe84 >> 2),
198         0x00000000,
199         (0x8000 << 16) | (0x89c0 >> 2),
200         0x00000000,
201         (0x8040 << 16) | (0x89c0 >> 2),
202         0x00000000,
203         (0x8000 << 16) | (0x914c >> 2),
204         0x00000000,
205         (0x8040 << 16) | (0x914c >> 2),
206         0x00000000,
207         (0x8000 << 16) | (0x8c20 >> 2),
208         0x00000000,
209         (0x8040 << 16) | (0x8c20 >> 2),
210         0x00000000,
211         (0x8000 << 16) | (0x9354 >> 2),
212         0x00000000,
213         (0x8040 << 16) | (0x9354 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9060 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9364 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9100 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x913c >> 2),
222         0x00000000,
223         (0x8000 << 16) | (0x90e0 >> 2),
224         0x00000000,
225         (0x8000 << 16) | (0x90e4 >> 2),
226         0x00000000,
227         (0x8000 << 16) | (0x90e8 >> 2),
228         0x00000000,
229         (0x8040 << 16) | (0x90e0 >> 2),
230         0x00000000,
231         (0x8040 << 16) | (0x90e4 >> 2),
232         0x00000000,
233         (0x8040 << 16) | (0x90e8 >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8bcc >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x8b24 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x88c4 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8e50 >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8c0c >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8e58 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8e5c >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x9508 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x950c >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x9494 >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0xac0c >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0xac10 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0xac14 >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0xae00 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0xac08 >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0x88d4 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x88c8 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x88cc >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x89b0 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x8b10 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x8a14 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9830 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x9834 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x9838 >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x9a10 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x9870 >> 2),
286         0x00000000,
287         (0x8000 << 16) | (0x9874 >> 2),
288         0x00000000,
289         (0x8001 << 16) | (0x9870 >> 2),
290         0x00000000,
291         (0x8001 << 16) | (0x9874 >> 2),
292         0x00000000,
293         (0x8040 << 16) | (0x9870 >> 2),
294         0x00000000,
295         (0x8040 << 16) | (0x9874 >> 2),
296         0x00000000,
297         (0x8041 << 16) | (0x9870 >> 2),
298         0x00000000,
299         (0x8041 << 16) | (0x9874 >> 2),
300         0x00000000,
301         0x00000000
302 };
303
304 static const u32 tahiti_golden_rlc_registers[] =
305 {
306         0xc424, 0xffffffff, 0x00601005,
307         0xc47c, 0xffffffff, 0x10104040,
308         0xc488, 0xffffffff, 0x0100000a,
309         0xc314, 0xffffffff, 0x00000800,
310         0xc30c, 0xffffffff, 0x800000f4,
311         0xf4a8, 0xffffffff, 0x00000000
312 };
313
314 static const u32 tahiti_golden_registers[] =
315 {
316         0x9a10, 0x00010000, 0x00018208,
317         0x9830, 0xffffffff, 0x00000000,
318         0x9834, 0xf00fffff, 0x00000400,
319         0x9838, 0x0002021c, 0x00020200,
320         0xc78, 0x00000080, 0x00000000,
321         0xd030, 0x000300c0, 0x00800040,
322         0xd830, 0x000300c0, 0x00800040,
323         0x5bb0, 0x000000f0, 0x00000070,
324         0x5bc0, 0x00200000, 0x50100000,
325         0x7030, 0x31000311, 0x00000011,
326         0x277c, 0x00000003, 0x000007ff,
327         0x240c, 0x000007ff, 0x00000000,
328         0x8a14, 0xf000001f, 0x00000007,
329         0x8b24, 0xffffffff, 0x00ffffff,
330         0x8b10, 0x0000ff0f, 0x00000000,
331         0x28a4c, 0x07ffffff, 0x4e000000,
332         0x28350, 0x3f3f3fff, 0x2a00126a,
333         0x30, 0x000000ff, 0x0040,
334         0x34, 0x00000040, 0x00004040,
335         0x9100, 0x07ffffff, 0x03000000,
336         0x8e88, 0x01ff1f3f, 0x00000000,
337         0x8e84, 0x01ff1f3f, 0x00000000,
338         0x9060, 0x0000007f, 0x00000020,
339         0x9508, 0x00010000, 0x00010000,
340         0xac14, 0x00000200, 0x000002fb,
341         0xac10, 0xffffffff, 0x0000543b,
342         0xac0c, 0xffffffff, 0xa9210876,
343         0x88d0, 0xffffffff, 0x000fff40,
344         0x88d4, 0x0000001f, 0x00000010,
345         0x1410, 0x20000000, 0x20fffed8,
346         0x15c0, 0x000c0fc0, 0x000c0400
347 };
348
349 static const u32 tahiti_golden_registers2[] =
350 {
351         0xc64, 0x00000001, 0x00000001
352 };
353
354 static const u32 pitcairn_golden_rlc_registers[] =
355 {
356         0xc424, 0xffffffff, 0x00601004,
357         0xc47c, 0xffffffff, 0x10102020,
358         0xc488, 0xffffffff, 0x01000020,
359         0xc314, 0xffffffff, 0x00000800,
360         0xc30c, 0xffffffff, 0x800000a4
361 };
362
363 static const u32 pitcairn_golden_registers[] =
364 {
365         0x9a10, 0x00010000, 0x00018208,
366         0x9830, 0xffffffff, 0x00000000,
367         0x9834, 0xf00fffff, 0x00000400,
368         0x9838, 0x0002021c, 0x00020200,
369         0xc78, 0x00000080, 0x00000000,
370         0xd030, 0x000300c0, 0x00800040,
371         0xd830, 0x000300c0, 0x00800040,
372         0x5bb0, 0x000000f0, 0x00000070,
373         0x5bc0, 0x00200000, 0x50100000,
374         0x7030, 0x31000311, 0x00000011,
375         0x2ae4, 0x00073ffe, 0x000022a2,
376         0x240c, 0x000007ff, 0x00000000,
377         0x8a14, 0xf000001f, 0x00000007,
378         0x8b24, 0xffffffff, 0x00ffffff,
379         0x8b10, 0x0000ff0f, 0x00000000,
380         0x28a4c, 0x07ffffff, 0x4e000000,
381         0x28350, 0x3f3f3fff, 0x2a00126a,
382         0x30, 0x000000ff, 0x0040,
383         0x34, 0x00000040, 0x00004040,
384         0x9100, 0x07ffffff, 0x03000000,
385         0x9060, 0x0000007f, 0x00000020,
386         0x9508, 0x00010000, 0x00010000,
387         0xac14, 0x000003ff, 0x000000f7,
388         0xac10, 0xffffffff, 0x00000000,
389         0xac0c, 0xffffffff, 0x32761054,
390         0x88d4, 0x0000001f, 0x00000010,
391         0x15c0, 0x000c0fc0, 0x000c0400
392 };
393
394 static const u32 verde_golden_rlc_registers[] =
395 {
396         0xc424, 0xffffffff, 0x033f1005,
397         0xc47c, 0xffffffff, 0x10808020,
398         0xc488, 0xffffffff, 0x00800008,
399         0xc314, 0xffffffff, 0x00001000,
400         0xc30c, 0xffffffff, 0x80010014
401 };
402
403 static const u32 verde_golden_registers[] =
404 {
405         0x9a10, 0x00010000, 0x00018208,
406         0x9830, 0xffffffff, 0x00000000,
407         0x9834, 0xf00fffff, 0x00000400,
408         0x9838, 0x0002021c, 0x00020200,
409         0xc78, 0x00000080, 0x00000000,
410         0xd030, 0x000300c0, 0x00800040,
411         0xd030, 0x000300c0, 0x00800040,
412         0xd830, 0x000300c0, 0x00800040,
413         0xd830, 0x000300c0, 0x00800040,
414         0x5bb0, 0x000000f0, 0x00000070,
415         0x5bc0, 0x00200000, 0x50100000,
416         0x7030, 0x31000311, 0x00000011,
417         0x2ae4, 0x00073ffe, 0x000022a2,
418         0x2ae4, 0x00073ffe, 0x000022a2,
419         0x2ae4, 0x00073ffe, 0x000022a2,
420         0x240c, 0x000007ff, 0x00000000,
421         0x240c, 0x000007ff, 0x00000000,
422         0x240c, 0x000007ff, 0x00000000,
423         0x8a14, 0xf000001f, 0x00000007,
424         0x8a14, 0xf000001f, 0x00000007,
425         0x8a14, 0xf000001f, 0x00000007,
426         0x8b24, 0xffffffff, 0x00ffffff,
427         0x8b10, 0x0000ff0f, 0x00000000,
428         0x28a4c, 0x07ffffff, 0x4e000000,
429         0x28350, 0x3f3f3fff, 0x0000124a,
430         0x28350, 0x3f3f3fff, 0x0000124a,
431         0x28350, 0x3f3f3fff, 0x0000124a,
432         0x30, 0x000000ff, 0x0040,
433         0x34, 0x00000040, 0x00004040,
434         0x9100, 0x07ffffff, 0x03000000,
435         0x9100, 0x07ffffff, 0x03000000,
436         0x8e88, 0x01ff1f3f, 0x00000000,
437         0x8e88, 0x01ff1f3f, 0x00000000,
438         0x8e88, 0x01ff1f3f, 0x00000000,
439         0x8e84, 0x01ff1f3f, 0x00000000,
440         0x8e84, 0x01ff1f3f, 0x00000000,
441         0x8e84, 0x01ff1f3f, 0x00000000,
442         0x9060, 0x0000007f, 0x00000020,
443         0x9508, 0x00010000, 0x00010000,
444         0xac14, 0x000003ff, 0x00000003,
445         0xac14, 0x000003ff, 0x00000003,
446         0xac14, 0x000003ff, 0x00000003,
447         0xac10, 0xffffffff, 0x00000000,
448         0xac10, 0xffffffff, 0x00000000,
449         0xac10, 0xffffffff, 0x00000000,
450         0xac0c, 0xffffffff, 0x00001032,
451         0xac0c, 0xffffffff, 0x00001032,
452         0xac0c, 0xffffffff, 0x00001032,
453         0x88d4, 0x0000001f, 0x00000010,
454         0x88d4, 0x0000001f, 0x00000010,
455         0x88d4, 0x0000001f, 0x00000010,
456         0x15c0, 0x000c0fc0, 0x000c0400
457 };
458
459 static const u32 oland_golden_rlc_registers[] =
460 {
461         0xc424, 0xffffffff, 0x00601005,
462         0xc47c, 0xffffffff, 0x10104040,
463         0xc488, 0xffffffff, 0x0100000a,
464         0xc314, 0xffffffff, 0x00000800,
465         0xc30c, 0xffffffff, 0x800000f4
466 };
467
468 static const u32 oland_golden_registers[] =
469 {
470         0x9a10, 0x00010000, 0x00018208,
471         0x9830, 0xffffffff, 0x00000000,
472         0x9834, 0xf00fffff, 0x00000400,
473         0x9838, 0x0002021c, 0x00020200,
474         0xc78, 0x00000080, 0x00000000,
475         0xd030, 0x000300c0, 0x00800040,
476         0xd830, 0x000300c0, 0x00800040,
477         0x5bb0, 0x000000f0, 0x00000070,
478         0x5bc0, 0x00200000, 0x50100000,
479         0x7030, 0x31000311, 0x00000011,
480         0x2ae4, 0x00073ffe, 0x000022a2,
481         0x240c, 0x000007ff, 0x00000000,
482         0x8a14, 0xf000001f, 0x00000007,
483         0x8b24, 0xffffffff, 0x00ffffff,
484         0x8b10, 0x0000ff0f, 0x00000000,
485         0x28a4c, 0x07ffffff, 0x4e000000,
486         0x28350, 0x3f3f3fff, 0x00000082,
487         0x30, 0x000000ff, 0x0040,
488         0x34, 0x00000040, 0x00004040,
489         0x9100, 0x07ffffff, 0x03000000,
490         0x9060, 0x0000007f, 0x00000020,
491         0x9508, 0x00010000, 0x00010000,
492         0xac14, 0x000003ff, 0x000000f3,
493         0xac10, 0xffffffff, 0x00000000,
494         0xac0c, 0xffffffff, 0x00003210,
495         0x88d4, 0x0000001f, 0x00000010,
496         0x15c0, 0x000c0fc0, 0x000c0400
497 };
498
499 static const u32 hainan_golden_registers[] =
500 {
501         0x9a10, 0x00010000, 0x00018208,
502         0x9830, 0xffffffff, 0x00000000,
503         0x9834, 0xf00fffff, 0x00000400,
504         0x9838, 0x0002021c, 0x00020200,
505         0xd0c0, 0xff000fff, 0x00000100,
506         0xd030, 0x000300c0, 0x00800040,
507         0xd8c0, 0xff000fff, 0x00000100,
508         0xd830, 0x000300c0, 0x00800040,
509         0x2ae4, 0x00073ffe, 0x000022a2,
510         0x240c, 0x000007ff, 0x00000000,
511         0x8a14, 0xf000001f, 0x00000007,
512         0x8b24, 0xffffffff, 0x00ffffff,
513         0x8b10, 0x0000ff0f, 0x00000000,
514         0x28a4c, 0x07ffffff, 0x4e000000,
515         0x28350, 0x3f3f3fff, 0x00000000,
516         0x30, 0x000000ff, 0x0040,
517         0x34, 0x00000040, 0x00004040,
518         0x9100, 0x03e00000, 0x03600000,
519         0x9060, 0x0000007f, 0x00000020,
520         0x9508, 0x00010000, 0x00010000,
521         0xac14, 0x000003ff, 0x000000f1,
522         0xac10, 0xffffffff, 0x00000000,
523         0xac0c, 0xffffffff, 0x00003210,
524         0x88d4, 0x0000001f, 0x00000010,
525         0x15c0, 0x000c0fc0, 0x000c0400
526 };
527
528 static const u32 hainan_golden_registers2[] =
529 {
530         0x98f8, 0xffffffff, 0x02010001
531 };
532
533 static const u32 tahiti_mgcg_cgcg_init[] =
534 {
535         0xc400, 0xffffffff, 0xfffffffc,
536         0x802c, 0xffffffff, 0xe0000000,
537         0x9a60, 0xffffffff, 0x00000100,
538         0x92a4, 0xffffffff, 0x00000100,
539         0xc164, 0xffffffff, 0x00000100,
540         0x9774, 0xffffffff, 0x00000100,
541         0x8984, 0xffffffff, 0x06000100,
542         0x8a18, 0xffffffff, 0x00000100,
543         0x92a0, 0xffffffff, 0x00000100,
544         0xc380, 0xffffffff, 0x00000100,
545         0x8b28, 0xffffffff, 0x00000100,
546         0x9144, 0xffffffff, 0x00000100,
547         0x8d88, 0xffffffff, 0x00000100,
548         0x8d8c, 0xffffffff, 0x00000100,
549         0x9030, 0xffffffff, 0x00000100,
550         0x9034, 0xffffffff, 0x00000100,
551         0x9038, 0xffffffff, 0x00000100,
552         0x903c, 0xffffffff, 0x00000100,
553         0xad80, 0xffffffff, 0x00000100,
554         0xac54, 0xffffffff, 0x00000100,
555         0x897c, 0xffffffff, 0x06000100,
556         0x9868, 0xffffffff, 0x00000100,
557         0x9510, 0xffffffff, 0x00000100,
558         0xaf04, 0xffffffff, 0x00000100,
559         0xae04, 0xffffffff, 0x00000100,
560         0x949c, 0xffffffff, 0x00000100,
561         0x802c, 0xffffffff, 0xe0000000,
562         0x9160, 0xffffffff, 0x00010000,
563         0x9164, 0xffffffff, 0x00030002,
564         0x9168, 0xffffffff, 0x00040007,
565         0x916c, 0xffffffff, 0x00060005,
566         0x9170, 0xffffffff, 0x00090008,
567         0x9174, 0xffffffff, 0x00020001,
568         0x9178, 0xffffffff, 0x00040003,
569         0x917c, 0xffffffff, 0x00000007,
570         0x9180, 0xffffffff, 0x00060005,
571         0x9184, 0xffffffff, 0x00090008,
572         0x9188, 0xffffffff, 0x00030002,
573         0x918c, 0xffffffff, 0x00050004,
574         0x9190, 0xffffffff, 0x00000008,
575         0x9194, 0xffffffff, 0x00070006,
576         0x9198, 0xffffffff, 0x000a0009,
577         0x919c, 0xffffffff, 0x00040003,
578         0x91a0, 0xffffffff, 0x00060005,
579         0x91a4, 0xffffffff, 0x00000009,
580         0x91a8, 0xffffffff, 0x00080007,
581         0x91ac, 0xffffffff, 0x000b000a,
582         0x91b0, 0xffffffff, 0x00050004,
583         0x91b4, 0xffffffff, 0x00070006,
584         0x91b8, 0xffffffff, 0x0008000b,
585         0x91bc, 0xffffffff, 0x000a0009,
586         0x91c0, 0xffffffff, 0x000d000c,
587         0x91c4, 0xffffffff, 0x00060005,
588         0x91c8, 0xffffffff, 0x00080007,
589         0x91cc, 0xffffffff, 0x0000000b,
590         0x91d0, 0xffffffff, 0x000a0009,
591         0x91d4, 0xffffffff, 0x000d000c,
592         0x91d8, 0xffffffff, 0x00070006,
593         0x91dc, 0xffffffff, 0x00090008,
594         0x91e0, 0xffffffff, 0x0000000c,
595         0x91e4, 0xffffffff, 0x000b000a,
596         0x91e8, 0xffffffff, 0x000e000d,
597         0x91ec, 0xffffffff, 0x00080007,
598         0x91f0, 0xffffffff, 0x000a0009,
599         0x91f4, 0xffffffff, 0x0000000d,
600         0x91f8, 0xffffffff, 0x000c000b,
601         0x91fc, 0xffffffff, 0x000f000e,
602         0x9200, 0xffffffff, 0x00090008,
603         0x9204, 0xffffffff, 0x000b000a,
604         0x9208, 0xffffffff, 0x000c000f,
605         0x920c, 0xffffffff, 0x000e000d,
606         0x9210, 0xffffffff, 0x00110010,
607         0x9214, 0xffffffff, 0x000a0009,
608         0x9218, 0xffffffff, 0x000c000b,
609         0x921c, 0xffffffff, 0x0000000f,
610         0x9220, 0xffffffff, 0x000e000d,
611         0x9224, 0xffffffff, 0x00110010,
612         0x9228, 0xffffffff, 0x000b000a,
613         0x922c, 0xffffffff, 0x000d000c,
614         0x9230, 0xffffffff, 0x00000010,
615         0x9234, 0xffffffff, 0x000f000e,
616         0x9238, 0xffffffff, 0x00120011,
617         0x923c, 0xffffffff, 0x000c000b,
618         0x9240, 0xffffffff, 0x000e000d,
619         0x9244, 0xffffffff, 0x00000011,
620         0x9248, 0xffffffff, 0x0010000f,
621         0x924c, 0xffffffff, 0x00130012,
622         0x9250, 0xffffffff, 0x000d000c,
623         0x9254, 0xffffffff, 0x000f000e,
624         0x9258, 0xffffffff, 0x00100013,
625         0x925c, 0xffffffff, 0x00120011,
626         0x9260, 0xffffffff, 0x00150014,
627         0x9264, 0xffffffff, 0x000e000d,
628         0x9268, 0xffffffff, 0x0010000f,
629         0x926c, 0xffffffff, 0x00000013,
630         0x9270, 0xffffffff, 0x00120011,
631         0x9274, 0xffffffff, 0x00150014,
632         0x9278, 0xffffffff, 0x000f000e,
633         0x927c, 0xffffffff, 0x00110010,
634         0x9280, 0xffffffff, 0x00000014,
635         0x9284, 0xffffffff, 0x00130012,
636         0x9288, 0xffffffff, 0x00160015,
637         0x928c, 0xffffffff, 0x0010000f,
638         0x9290, 0xffffffff, 0x00120011,
639         0x9294, 0xffffffff, 0x00000015,
640         0x9298, 0xffffffff, 0x00140013,
641         0x929c, 0xffffffff, 0x00170016,
642         0x9150, 0xffffffff, 0x96940200,
643         0x8708, 0xffffffff, 0x00900100,
644         0xc478, 0xffffffff, 0x00000080,
645         0xc404, 0xffffffff, 0x0020003f,
646         0x30, 0xffffffff, 0x0000001c,
647         0x34, 0x000f0000, 0x000f0000,
648         0x160c, 0xffffffff, 0x00000100,
649         0x1024, 0xffffffff, 0x00000100,
650         0x102c, 0x00000101, 0x00000000,
651         0x20a8, 0xffffffff, 0x00000104,
652         0x264c, 0x000c0000, 0x000c0000,
653         0x2648, 0x000c0000, 0x000c0000,
654         0x55e4, 0xff000fff, 0x00000100,
655         0x55e8, 0x00000001, 0x00000001,
656         0x2f50, 0x00000001, 0x00000001,
657         0x30cc, 0xc0000fff, 0x00000104,
658         0xc1e4, 0x00000001, 0x00000001,
659         0xd0c0, 0xfffffff0, 0x00000100,
660         0xd8c0, 0xfffffff0, 0x00000100
661 };
662
663 static const u32 pitcairn_mgcg_cgcg_init[] =
664 {
665         0xc400, 0xffffffff, 0xfffffffc,
666         0x802c, 0xffffffff, 0xe0000000,
667         0x9a60, 0xffffffff, 0x00000100,
668         0x92a4, 0xffffffff, 0x00000100,
669         0xc164, 0xffffffff, 0x00000100,
670         0x9774, 0xffffffff, 0x00000100,
671         0x8984, 0xffffffff, 0x06000100,
672         0x8a18, 0xffffffff, 0x00000100,
673         0x92a0, 0xffffffff, 0x00000100,
674         0xc380, 0xffffffff, 0x00000100,
675         0x8b28, 0xffffffff, 0x00000100,
676         0x9144, 0xffffffff, 0x00000100,
677         0x8d88, 0xffffffff, 0x00000100,
678         0x8d8c, 0xffffffff, 0x00000100,
679         0x9030, 0xffffffff, 0x00000100,
680         0x9034, 0xffffffff, 0x00000100,
681         0x9038, 0xffffffff, 0x00000100,
682         0x903c, 0xffffffff, 0x00000100,
683         0xad80, 0xffffffff, 0x00000100,
684         0xac54, 0xffffffff, 0x00000100,
685         0x897c, 0xffffffff, 0x06000100,
686         0x9868, 0xffffffff, 0x00000100,
687         0x9510, 0xffffffff, 0x00000100,
688         0xaf04, 0xffffffff, 0x00000100,
689         0xae04, 0xffffffff, 0x00000100,
690         0x949c, 0xffffffff, 0x00000100,
691         0x802c, 0xffffffff, 0xe0000000,
692         0x9160, 0xffffffff, 0x00010000,
693         0x9164, 0xffffffff, 0x00030002,
694         0x9168, 0xffffffff, 0x00040007,
695         0x916c, 0xffffffff, 0x00060005,
696         0x9170, 0xffffffff, 0x00090008,
697         0x9174, 0xffffffff, 0x00020001,
698         0x9178, 0xffffffff, 0x00040003,
699         0x917c, 0xffffffff, 0x00000007,
700         0x9180, 0xffffffff, 0x00060005,
701         0x9184, 0xffffffff, 0x00090008,
702         0x9188, 0xffffffff, 0x00030002,
703         0x918c, 0xffffffff, 0x00050004,
704         0x9190, 0xffffffff, 0x00000008,
705         0x9194, 0xffffffff, 0x00070006,
706         0x9198, 0xffffffff, 0x000a0009,
707         0x919c, 0xffffffff, 0x00040003,
708         0x91a0, 0xffffffff, 0x00060005,
709         0x91a4, 0xffffffff, 0x00000009,
710         0x91a8, 0xffffffff, 0x00080007,
711         0x91ac, 0xffffffff, 0x000b000a,
712         0x91b0, 0xffffffff, 0x00050004,
713         0x91b4, 0xffffffff, 0x00070006,
714         0x91b8, 0xffffffff, 0x0008000b,
715         0x91bc, 0xffffffff, 0x000a0009,
716         0x91c0, 0xffffffff, 0x000d000c,
717         0x9200, 0xffffffff, 0x00090008,
718         0x9204, 0xffffffff, 0x000b000a,
719         0x9208, 0xffffffff, 0x000c000f,
720         0x920c, 0xffffffff, 0x000e000d,
721         0x9210, 0xffffffff, 0x00110010,
722         0x9214, 0xffffffff, 0x000a0009,
723         0x9218, 0xffffffff, 0x000c000b,
724         0x921c, 0xffffffff, 0x0000000f,
725         0x9220, 0xffffffff, 0x000e000d,
726         0x9224, 0xffffffff, 0x00110010,
727         0x9228, 0xffffffff, 0x000b000a,
728         0x922c, 0xffffffff, 0x000d000c,
729         0x9230, 0xffffffff, 0x00000010,
730         0x9234, 0xffffffff, 0x000f000e,
731         0x9238, 0xffffffff, 0x00120011,
732         0x923c, 0xffffffff, 0x000c000b,
733         0x9240, 0xffffffff, 0x000e000d,
734         0x9244, 0xffffffff, 0x00000011,
735         0x9248, 0xffffffff, 0x0010000f,
736         0x924c, 0xffffffff, 0x00130012,
737         0x9250, 0xffffffff, 0x000d000c,
738         0x9254, 0xffffffff, 0x000f000e,
739         0x9258, 0xffffffff, 0x00100013,
740         0x925c, 0xffffffff, 0x00120011,
741         0x9260, 0xffffffff, 0x00150014,
742         0x9150, 0xffffffff, 0x96940200,
743         0x8708, 0xffffffff, 0x00900100,
744         0xc478, 0xffffffff, 0x00000080,
745         0xc404, 0xffffffff, 0x0020003f,
746         0x30, 0xffffffff, 0x0000001c,
747         0x34, 0x000f0000, 0x000f0000,
748         0x160c, 0xffffffff, 0x00000100,
749         0x1024, 0xffffffff, 0x00000100,
750         0x102c, 0x00000101, 0x00000000,
751         0x20a8, 0xffffffff, 0x00000104,
752         0x55e4, 0xff000fff, 0x00000100,
753         0x55e8, 0x00000001, 0x00000001,
754         0x2f50, 0x00000001, 0x00000001,
755         0x30cc, 0xc0000fff, 0x00000104,
756         0xc1e4, 0x00000001, 0x00000001,
757         0xd0c0, 0xfffffff0, 0x00000100,
758         0xd8c0, 0xfffffff0, 0x00000100
759 };
760
761 static const u32 verde_mgcg_cgcg_init[] =
762 {
763         0xc400, 0xffffffff, 0xfffffffc,
764         0x802c, 0xffffffff, 0xe0000000,
765         0x9a60, 0xffffffff, 0x00000100,
766         0x92a4, 0xffffffff, 0x00000100,
767         0xc164, 0xffffffff, 0x00000100,
768         0x9774, 0xffffffff, 0x00000100,
769         0x8984, 0xffffffff, 0x06000100,
770         0x8a18, 0xffffffff, 0x00000100,
771         0x92a0, 0xffffffff, 0x00000100,
772         0xc380, 0xffffffff, 0x00000100,
773         0x8b28, 0xffffffff, 0x00000100,
774         0x9144, 0xffffffff, 0x00000100,
775         0x8d88, 0xffffffff, 0x00000100,
776         0x8d8c, 0xffffffff, 0x00000100,
777         0x9030, 0xffffffff, 0x00000100,
778         0x9034, 0xffffffff, 0x00000100,
779         0x9038, 0xffffffff, 0x00000100,
780         0x903c, 0xffffffff, 0x00000100,
781         0xad80, 0xffffffff, 0x00000100,
782         0xac54, 0xffffffff, 0x00000100,
783         0x897c, 0xffffffff, 0x06000100,
784         0x9868, 0xffffffff, 0x00000100,
785         0x9510, 0xffffffff, 0x00000100,
786         0xaf04, 0xffffffff, 0x00000100,
787         0xae04, 0xffffffff, 0x00000100,
788         0x949c, 0xffffffff, 0x00000100,
789         0x802c, 0xffffffff, 0xe0000000,
790         0x9160, 0xffffffff, 0x00010000,
791         0x9164, 0xffffffff, 0x00030002,
792         0x9168, 0xffffffff, 0x00040007,
793         0x916c, 0xffffffff, 0x00060005,
794         0x9170, 0xffffffff, 0x00090008,
795         0x9174, 0xffffffff, 0x00020001,
796         0x9178, 0xffffffff, 0x00040003,
797         0x917c, 0xffffffff, 0x00000007,
798         0x9180, 0xffffffff, 0x00060005,
799         0x9184, 0xffffffff, 0x00090008,
800         0x9188, 0xffffffff, 0x00030002,
801         0x918c, 0xffffffff, 0x00050004,
802         0x9190, 0xffffffff, 0x00000008,
803         0x9194, 0xffffffff, 0x00070006,
804         0x9198, 0xffffffff, 0x000a0009,
805         0x919c, 0xffffffff, 0x00040003,
806         0x91a0, 0xffffffff, 0x00060005,
807         0x91a4, 0xffffffff, 0x00000009,
808         0x91a8, 0xffffffff, 0x00080007,
809         0x91ac, 0xffffffff, 0x000b000a,
810         0x91b0, 0xffffffff, 0x00050004,
811         0x91b4, 0xffffffff, 0x00070006,
812         0x91b8, 0xffffffff, 0x0008000b,
813         0x91bc, 0xffffffff, 0x000a0009,
814         0x91c0, 0xffffffff, 0x000d000c,
815         0x9200, 0xffffffff, 0x00090008,
816         0x9204, 0xffffffff, 0x000b000a,
817         0x9208, 0xffffffff, 0x000c000f,
818         0x920c, 0xffffffff, 0x000e000d,
819         0x9210, 0xffffffff, 0x00110010,
820         0x9214, 0xffffffff, 0x000a0009,
821         0x9218, 0xffffffff, 0x000c000b,
822         0x921c, 0xffffffff, 0x0000000f,
823         0x9220, 0xffffffff, 0x000e000d,
824         0x9224, 0xffffffff, 0x00110010,
825         0x9228, 0xffffffff, 0x000b000a,
826         0x922c, 0xffffffff, 0x000d000c,
827         0x9230, 0xffffffff, 0x00000010,
828         0x9234, 0xffffffff, 0x000f000e,
829         0x9238, 0xffffffff, 0x00120011,
830         0x923c, 0xffffffff, 0x000c000b,
831         0x9240, 0xffffffff, 0x000e000d,
832         0x9244, 0xffffffff, 0x00000011,
833         0x9248, 0xffffffff, 0x0010000f,
834         0x924c, 0xffffffff, 0x00130012,
835         0x9250, 0xffffffff, 0x000d000c,
836         0x9254, 0xffffffff, 0x000f000e,
837         0x9258, 0xffffffff, 0x00100013,
838         0x925c, 0xffffffff, 0x00120011,
839         0x9260, 0xffffffff, 0x00150014,
840         0x9150, 0xffffffff, 0x96940200,
841         0x8708, 0xffffffff, 0x00900100,
842         0xc478, 0xffffffff, 0x00000080,
843         0xc404, 0xffffffff, 0x0020003f,
844         0x30, 0xffffffff, 0x0000001c,
845         0x34, 0x000f0000, 0x000f0000,
846         0x160c, 0xffffffff, 0x00000100,
847         0x1024, 0xffffffff, 0x00000100,
848         0x102c, 0x00000101, 0x00000000,
849         0x20a8, 0xffffffff, 0x00000104,
850         0x264c, 0x000c0000, 0x000c0000,
851         0x2648, 0x000c0000, 0x000c0000,
852         0x55e4, 0xff000fff, 0x00000100,
853         0x55e8, 0x00000001, 0x00000001,
854         0x2f50, 0x00000001, 0x00000001,
855         0x30cc, 0xc0000fff, 0x00000104,
856         0xc1e4, 0x00000001, 0x00000001,
857         0xd0c0, 0xfffffff0, 0x00000100,
858         0xd8c0, 0xfffffff0, 0x00000100
859 };
860
861 static const u32 oland_mgcg_cgcg_init[] =
862 {
863         0xc400, 0xffffffff, 0xfffffffc,
864         0x802c, 0xffffffff, 0xe0000000,
865         0x9a60, 0xffffffff, 0x00000100,
866         0x92a4, 0xffffffff, 0x00000100,
867         0xc164, 0xffffffff, 0x00000100,
868         0x9774, 0xffffffff, 0x00000100,
869         0x8984, 0xffffffff, 0x06000100,
870         0x8a18, 0xffffffff, 0x00000100,
871         0x92a0, 0xffffffff, 0x00000100,
872         0xc380, 0xffffffff, 0x00000100,
873         0x8b28, 0xffffffff, 0x00000100,
874         0x9144, 0xffffffff, 0x00000100,
875         0x8d88, 0xffffffff, 0x00000100,
876         0x8d8c, 0xffffffff, 0x00000100,
877         0x9030, 0xffffffff, 0x00000100,
878         0x9034, 0xffffffff, 0x00000100,
879         0x9038, 0xffffffff, 0x00000100,
880         0x903c, 0xffffffff, 0x00000100,
881         0xad80, 0xffffffff, 0x00000100,
882         0xac54, 0xffffffff, 0x00000100,
883         0x897c, 0xffffffff, 0x06000100,
884         0x9868, 0xffffffff, 0x00000100,
885         0x9510, 0xffffffff, 0x00000100,
886         0xaf04, 0xffffffff, 0x00000100,
887         0xae04, 0xffffffff, 0x00000100,
888         0x949c, 0xffffffff, 0x00000100,
889         0x802c, 0xffffffff, 0xe0000000,
890         0x9160, 0xffffffff, 0x00010000,
891         0x9164, 0xffffffff, 0x00030002,
892         0x9168, 0xffffffff, 0x00040007,
893         0x916c, 0xffffffff, 0x00060005,
894         0x9170, 0xffffffff, 0x00090008,
895         0x9174, 0xffffffff, 0x00020001,
896         0x9178, 0xffffffff, 0x00040003,
897         0x917c, 0xffffffff, 0x00000007,
898         0x9180, 0xffffffff, 0x00060005,
899         0x9184, 0xffffffff, 0x00090008,
900         0x9188, 0xffffffff, 0x00030002,
901         0x918c, 0xffffffff, 0x00050004,
902         0x9190, 0xffffffff, 0x00000008,
903         0x9194, 0xffffffff, 0x00070006,
904         0x9198, 0xffffffff, 0x000a0009,
905         0x919c, 0xffffffff, 0x00040003,
906         0x91a0, 0xffffffff, 0x00060005,
907         0x91a4, 0xffffffff, 0x00000009,
908         0x91a8, 0xffffffff, 0x00080007,
909         0x91ac, 0xffffffff, 0x000b000a,
910         0x91b0, 0xffffffff, 0x00050004,
911         0x91b4, 0xffffffff, 0x00070006,
912         0x91b8, 0xffffffff, 0x0008000b,
913         0x91bc, 0xffffffff, 0x000a0009,
914         0x91c0, 0xffffffff, 0x000d000c,
915         0x91c4, 0xffffffff, 0x00060005,
916         0x91c8, 0xffffffff, 0x00080007,
917         0x91cc, 0xffffffff, 0x0000000b,
918         0x91d0, 0xffffffff, 0x000a0009,
919         0x91d4, 0xffffffff, 0x000d000c,
920         0x9150, 0xffffffff, 0x96940200,
921         0x8708, 0xffffffff, 0x00900100,
922         0xc478, 0xffffffff, 0x00000080,
923         0xc404, 0xffffffff, 0x0020003f,
924         0x30, 0xffffffff, 0x0000001c,
925         0x34, 0x000f0000, 0x000f0000,
926         0x160c, 0xffffffff, 0x00000100,
927         0x1024, 0xffffffff, 0x00000100,
928         0x102c, 0x00000101, 0x00000000,
929         0x20a8, 0xffffffff, 0x00000104,
930         0x264c, 0x000c0000, 0x000c0000,
931         0x2648, 0x000c0000, 0x000c0000,
932         0x55e4, 0xff000fff, 0x00000100,
933         0x55e8, 0x00000001, 0x00000001,
934         0x2f50, 0x00000001, 0x00000001,
935         0x30cc, 0xc0000fff, 0x00000104,
936         0xc1e4, 0x00000001, 0x00000001,
937         0xd0c0, 0xfffffff0, 0x00000100,
938         0xd8c0, 0xfffffff0, 0x00000100
939 };
940
941 static const u32 hainan_mgcg_cgcg_init[] =
942 {
943         0xc400, 0xffffffff, 0xfffffffc,
944         0x802c, 0xffffffff, 0xe0000000,
945         0x9a60, 0xffffffff, 0x00000100,
946         0x92a4, 0xffffffff, 0x00000100,
947         0xc164, 0xffffffff, 0x00000100,
948         0x9774, 0xffffffff, 0x00000100,
949         0x8984, 0xffffffff, 0x06000100,
950         0x8a18, 0xffffffff, 0x00000100,
951         0x92a0, 0xffffffff, 0x00000100,
952         0xc380, 0xffffffff, 0x00000100,
953         0x8b28, 0xffffffff, 0x00000100,
954         0x9144, 0xffffffff, 0x00000100,
955         0x8d88, 0xffffffff, 0x00000100,
956         0x8d8c, 0xffffffff, 0x00000100,
957         0x9030, 0xffffffff, 0x00000100,
958         0x9034, 0xffffffff, 0x00000100,
959         0x9038, 0xffffffff, 0x00000100,
960         0x903c, 0xffffffff, 0x00000100,
961         0xad80, 0xffffffff, 0x00000100,
962         0xac54, 0xffffffff, 0x00000100,
963         0x897c, 0xffffffff, 0x06000100,
964         0x9868, 0xffffffff, 0x00000100,
965         0x9510, 0xffffffff, 0x00000100,
966         0xaf04, 0xffffffff, 0x00000100,
967         0xae04, 0xffffffff, 0x00000100,
968         0x949c, 0xffffffff, 0x00000100,
969         0x802c, 0xffffffff, 0xe0000000,
970         0x9160, 0xffffffff, 0x00010000,
971         0x9164, 0xffffffff, 0x00030002,
972         0x9168, 0xffffffff, 0x00040007,
973         0x916c, 0xffffffff, 0x00060005,
974         0x9170, 0xffffffff, 0x00090008,
975         0x9174, 0xffffffff, 0x00020001,
976         0x9178, 0xffffffff, 0x00040003,
977         0x917c, 0xffffffff, 0x00000007,
978         0x9180, 0xffffffff, 0x00060005,
979         0x9184, 0xffffffff, 0x00090008,
980         0x9188, 0xffffffff, 0x00030002,
981         0x918c, 0xffffffff, 0x00050004,
982         0x9190, 0xffffffff, 0x00000008,
983         0x9194, 0xffffffff, 0x00070006,
984         0x9198, 0xffffffff, 0x000a0009,
985         0x919c, 0xffffffff, 0x00040003,
986         0x91a0, 0xffffffff, 0x00060005,
987         0x91a4, 0xffffffff, 0x00000009,
988         0x91a8, 0xffffffff, 0x00080007,
989         0x91ac, 0xffffffff, 0x000b000a,
990         0x91b0, 0xffffffff, 0x00050004,
991         0x91b4, 0xffffffff, 0x00070006,
992         0x91b8, 0xffffffff, 0x0008000b,
993         0x91bc, 0xffffffff, 0x000a0009,
994         0x91c0, 0xffffffff, 0x000d000c,
995         0x91c4, 0xffffffff, 0x00060005,
996         0x91c8, 0xffffffff, 0x00080007,
997         0x91cc, 0xffffffff, 0x0000000b,
998         0x91d0, 0xffffffff, 0x000a0009,
999         0x91d4, 0xffffffff, 0x000d000c,
1000         0x9150, 0xffffffff, 0x96940200,
1001         0x8708, 0xffffffff, 0x00900100,
1002         0xc478, 0xffffffff, 0x00000080,
1003         0xc404, 0xffffffff, 0x0020003f,
1004         0x30, 0xffffffff, 0x0000001c,
1005         0x34, 0x000f0000, 0x000f0000,
1006         0x160c, 0xffffffff, 0x00000100,
1007         0x1024, 0xffffffff, 0x00000100,
1008         0x20a8, 0xffffffff, 0x00000104,
1009         0x264c, 0x000c0000, 0x000c0000,
1010         0x2648, 0x000c0000, 0x000c0000,
1011         0x2f50, 0x00000001, 0x00000001,
1012         0x30cc, 0xc0000fff, 0x00000104,
1013         0xc1e4, 0x00000001, 0x00000001,
1014         0xd0c0, 0xfffffff0, 0x00000100,
1015         0xd8c0, 0xfffffff0, 0x00000100
1016 };
1017
1018 static u32 verde_pg_init[] =
1019 {
1020         0x353c, 0xffffffff, 0x40000,
1021         0x3538, 0xffffffff, 0x200010ff,
1022         0x353c, 0xffffffff, 0x0,
1023         0x353c, 0xffffffff, 0x0,
1024         0x353c, 0xffffffff, 0x0,
1025         0x353c, 0xffffffff, 0x0,
1026         0x353c, 0xffffffff, 0x0,
1027         0x353c, 0xffffffff, 0x7007,
1028         0x3538, 0xffffffff, 0x300010ff,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x400000,
1035         0x3538, 0xffffffff, 0x100010ff,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x120200,
1042         0x3538, 0xffffffff, 0x500010ff,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x1e1e16,
1049         0x3538, 0xffffffff, 0x600010ff,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x171f1e,
1056         0x3538, 0xffffffff, 0x700010ff,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x3538, 0xffffffff, 0x9ff,
1064         0x3500, 0xffffffff, 0x0,
1065         0x3504, 0xffffffff, 0x10000800,
1066         0x3504, 0xffffffff, 0xf,
1067         0x3504, 0xffffffff, 0xf,
1068         0x3500, 0xffffffff, 0x4,
1069         0x3504, 0xffffffff, 0x1000051e,
1070         0x3504, 0xffffffff, 0xffff,
1071         0x3504, 0xffffffff, 0xffff,
1072         0x3500, 0xffffffff, 0x8,
1073         0x3504, 0xffffffff, 0x80500,
1074         0x3500, 0xffffffff, 0x12,
1075         0x3504, 0xffffffff, 0x9050c,
1076         0x3500, 0xffffffff, 0x1d,
1077         0x3504, 0xffffffff, 0xb052c,
1078         0x3500, 0xffffffff, 0x2a,
1079         0x3504, 0xffffffff, 0x1053e,
1080         0x3500, 0xffffffff, 0x2d,
1081         0x3504, 0xffffffff, 0x10546,
1082         0x3500, 0xffffffff, 0x30,
1083         0x3504, 0xffffffff, 0xa054e,
1084         0x3500, 0xffffffff, 0x3c,
1085         0x3504, 0xffffffff, 0x1055f,
1086         0x3500, 0xffffffff, 0x3f,
1087         0x3504, 0xffffffff, 0x10567,
1088         0x3500, 0xffffffff, 0x42,
1089         0x3504, 0xffffffff, 0x1056f,
1090         0x3500, 0xffffffff, 0x45,
1091         0x3504, 0xffffffff, 0x10572,
1092         0x3500, 0xffffffff, 0x48,
1093         0x3504, 0xffffffff, 0x20575,
1094         0x3500, 0xffffffff, 0x4c,
1095         0x3504, 0xffffffff, 0x190801,
1096         0x3500, 0xffffffff, 0x67,
1097         0x3504, 0xffffffff, 0x1082a,
1098         0x3500, 0xffffffff, 0x6a,
1099         0x3504, 0xffffffff, 0x1b082d,
1100         0x3500, 0xffffffff, 0x87,
1101         0x3504, 0xffffffff, 0x310851,
1102         0x3500, 0xffffffff, 0xba,
1103         0x3504, 0xffffffff, 0x891,
1104         0x3500, 0xffffffff, 0xbc,
1105         0x3504, 0xffffffff, 0x893,
1106         0x3500, 0xffffffff, 0xbe,
1107         0x3504, 0xffffffff, 0x20895,
1108         0x3500, 0xffffffff, 0xc2,
1109         0x3504, 0xffffffff, 0x20899,
1110         0x3500, 0xffffffff, 0xc6,
1111         0x3504, 0xffffffff, 0x2089d,
1112         0x3500, 0xffffffff, 0xca,
1113         0x3504, 0xffffffff, 0x8a1,
1114         0x3500, 0xffffffff, 0xcc,
1115         0x3504, 0xffffffff, 0x8a3,
1116         0x3500, 0xffffffff, 0xce,
1117         0x3504, 0xffffffff, 0x308a5,
1118         0x3500, 0xffffffff, 0xd3,
1119         0x3504, 0xffffffff, 0x6d08cd,
1120         0x3500, 0xffffffff, 0x142,
1121         0x3504, 0xffffffff, 0x2000095a,
1122         0x3504, 0xffffffff, 0x1,
1123         0x3500, 0xffffffff, 0x144,
1124         0x3504, 0xffffffff, 0x301f095b,
1125         0x3500, 0xffffffff, 0x165,
1126         0x3504, 0xffffffff, 0xc094d,
1127         0x3500, 0xffffffff, 0x173,
1128         0x3504, 0xffffffff, 0xf096d,
1129         0x3500, 0xffffffff, 0x184,
1130         0x3504, 0xffffffff, 0x15097f,
1131         0x3500, 0xffffffff, 0x19b,
1132         0x3504, 0xffffffff, 0xc0998,
1133         0x3500, 0xffffffff, 0x1a9,
1134         0x3504, 0xffffffff, 0x409a7,
1135         0x3500, 0xffffffff, 0x1af,
1136         0x3504, 0xffffffff, 0xcdc,
1137         0x3500, 0xffffffff, 0x1b1,
1138         0x3504, 0xffffffff, 0x800,
1139         0x3508, 0xffffffff, 0x6c9b2000,
1140         0x3510, 0xfc00, 0x2000,
1141         0x3544, 0xffffffff, 0xfc0,
1142         0x28d4, 0x00000100, 0x100
1143 };
1144
1145 static void si_init_golden_registers(struct radeon_device *rdev)
1146 {
1147         switch (rdev->family) {
1148         case CHIP_TAHITI:
1149                 radeon_program_register_sequence(rdev,
1150                                                  tahiti_golden_registers,
1151                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1152                 radeon_program_register_sequence(rdev,
1153                                                  tahiti_golden_rlc_registers,
1154                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1155                 radeon_program_register_sequence(rdev,
1156                                                  tahiti_mgcg_cgcg_init,
1157                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1158                 radeon_program_register_sequence(rdev,
1159                                                  tahiti_golden_registers2,
1160                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1161                 break;
1162         case CHIP_PITCAIRN:
1163                 radeon_program_register_sequence(rdev,
1164                                                  pitcairn_golden_registers,
1165                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1166                 radeon_program_register_sequence(rdev,
1167                                                  pitcairn_golden_rlc_registers,
1168                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1169                 radeon_program_register_sequence(rdev,
1170                                                  pitcairn_mgcg_cgcg_init,
1171                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1172                 break;
1173         case CHIP_VERDE:
1174                 radeon_program_register_sequence(rdev,
1175                                                  verde_golden_registers,
1176                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1177                 radeon_program_register_sequence(rdev,
1178                                                  verde_golden_rlc_registers,
1179                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1180                 radeon_program_register_sequence(rdev,
1181                                                  verde_mgcg_cgcg_init,
1182                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1183                 radeon_program_register_sequence(rdev,
1184                                                  verde_pg_init,
1185                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1186                 break;
1187         case CHIP_OLAND:
1188                 radeon_program_register_sequence(rdev,
1189                                                  oland_golden_registers,
1190                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1191                 radeon_program_register_sequence(rdev,
1192                                                  oland_golden_rlc_registers,
1193                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1194                 radeon_program_register_sequence(rdev,
1195                                                  oland_mgcg_cgcg_init,
1196                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1197                 break;
1198         case CHIP_HAINAN:
1199                 radeon_program_register_sequence(rdev,
1200                                                  hainan_golden_registers,
1201                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1202                 radeon_program_register_sequence(rdev,
1203                                                  hainan_golden_registers2,
1204                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1205                 radeon_program_register_sequence(rdev,
1206                                                  hainan_mgcg_cgcg_init,
1207                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1208                 break;
1209         default:
1210                 break;
1211         }
1212 }
1213
1214 #define PCIE_BUS_CLK                10000
1215 #define TCLK                        (PCIE_BUS_CLK / 10)
1216
1217 /**
1218  * si_get_xclk - get the xclk
1219  *
1220  * @rdev: radeon_device pointer
1221  *
1222  * Returns the reference clock used by the gfx engine
1223  * (SI).
1224  */
1225 u32 si_get_xclk(struct radeon_device *rdev)
1226 {
1227         u32 reference_clock = rdev->clock.spll.reference_freq;
1228         u32 tmp;
1229
1230         tmp = RREG32(CG_CLKPIN_CNTL_2);
1231         if (tmp & MUX_TCLK_TO_XCLK)
1232                 return TCLK;
1233
1234         tmp = RREG32(CG_CLKPIN_CNTL);
1235         if (tmp & XTALIN_DIVIDE)
1236                 return reference_clock / 4;
1237
1238         return reference_clock;
1239 }
1240
1241 /* get temperature in millidegrees */
1242 int si_get_temp(struct radeon_device *rdev)
1243 {
1244         u32 temp;
1245         int actual_temp = 0;
1246
1247         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1248                 CTF_TEMP_SHIFT;
1249
1250         if (temp & 0x200)
1251                 actual_temp = 255;
1252         else
1253                 actual_temp = temp & 0x1ff;
1254
1255         actual_temp = (actual_temp * 1000);
1256
1257         return actual_temp;
1258 }
1259
1260 #define TAHITI_IO_MC_REGS_SIZE 36
1261
1262 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1263         {0x0000006f, 0x03044000},
1264         {0x00000070, 0x0480c018},
1265         {0x00000071, 0x00000040},
1266         {0x00000072, 0x01000000},
1267         {0x00000074, 0x000000ff},
1268         {0x00000075, 0x00143400},
1269         {0x00000076, 0x08ec0800},
1270         {0x00000077, 0x040000cc},
1271         {0x00000079, 0x00000000},
1272         {0x0000007a, 0x21000409},
1273         {0x0000007c, 0x00000000},
1274         {0x0000007d, 0xe8000000},
1275         {0x0000007e, 0x044408a8},
1276         {0x0000007f, 0x00000003},
1277         {0x00000080, 0x00000000},
1278         {0x00000081, 0x01000000},
1279         {0x00000082, 0x02000000},
1280         {0x00000083, 0x00000000},
1281         {0x00000084, 0xe3f3e4f4},
1282         {0x00000085, 0x00052024},
1283         {0x00000087, 0x00000000},
1284         {0x00000088, 0x66036603},
1285         {0x00000089, 0x01000000},
1286         {0x0000008b, 0x1c0a0000},
1287         {0x0000008c, 0xff010000},
1288         {0x0000008e, 0xffffefff},
1289         {0x0000008f, 0xfff3efff},
1290         {0x00000090, 0xfff3efbf},
1291         {0x00000094, 0x00101101},
1292         {0x00000095, 0x00000fff},
1293         {0x00000096, 0x00116fff},
1294         {0x00000097, 0x60010000},
1295         {0x00000098, 0x10010000},
1296         {0x00000099, 0x00006000},
1297         {0x0000009a, 0x00001000},
1298         {0x0000009f, 0x00a77400}
1299 };
1300
1301 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1302         {0x0000006f, 0x03044000},
1303         {0x00000070, 0x0480c018},
1304         {0x00000071, 0x00000040},
1305         {0x00000072, 0x01000000},
1306         {0x00000074, 0x000000ff},
1307         {0x00000075, 0x00143400},
1308         {0x00000076, 0x08ec0800},
1309         {0x00000077, 0x040000cc},
1310         {0x00000079, 0x00000000},
1311         {0x0000007a, 0x21000409},
1312         {0x0000007c, 0x00000000},
1313         {0x0000007d, 0xe8000000},
1314         {0x0000007e, 0x044408a8},
1315         {0x0000007f, 0x00000003},
1316         {0x00000080, 0x00000000},
1317         {0x00000081, 0x01000000},
1318         {0x00000082, 0x02000000},
1319         {0x00000083, 0x00000000},
1320         {0x00000084, 0xe3f3e4f4},
1321         {0x00000085, 0x00052024},
1322         {0x00000087, 0x00000000},
1323         {0x00000088, 0x66036603},
1324         {0x00000089, 0x01000000},
1325         {0x0000008b, 0x1c0a0000},
1326         {0x0000008c, 0xff010000},
1327         {0x0000008e, 0xffffefff},
1328         {0x0000008f, 0xfff3efff},
1329         {0x00000090, 0xfff3efbf},
1330         {0x00000094, 0x00101101},
1331         {0x00000095, 0x00000fff},
1332         {0x00000096, 0x00116fff},
1333         {0x00000097, 0x60010000},
1334         {0x00000098, 0x10010000},
1335         {0x00000099, 0x00006000},
1336         {0x0000009a, 0x00001000},
1337         {0x0000009f, 0x00a47400}
1338 };
1339
1340 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1341         {0x0000006f, 0x03044000},
1342         {0x00000070, 0x0480c018},
1343         {0x00000071, 0x00000040},
1344         {0x00000072, 0x01000000},
1345         {0x00000074, 0x000000ff},
1346         {0x00000075, 0x00143400},
1347         {0x00000076, 0x08ec0800},
1348         {0x00000077, 0x040000cc},
1349         {0x00000079, 0x00000000},
1350         {0x0000007a, 0x21000409},
1351         {0x0000007c, 0x00000000},
1352         {0x0000007d, 0xe8000000},
1353         {0x0000007e, 0x044408a8},
1354         {0x0000007f, 0x00000003},
1355         {0x00000080, 0x00000000},
1356         {0x00000081, 0x01000000},
1357         {0x00000082, 0x02000000},
1358         {0x00000083, 0x00000000},
1359         {0x00000084, 0xe3f3e4f4},
1360         {0x00000085, 0x00052024},
1361         {0x00000087, 0x00000000},
1362         {0x00000088, 0x66036603},
1363         {0x00000089, 0x01000000},
1364         {0x0000008b, 0x1c0a0000},
1365         {0x0000008c, 0xff010000},
1366         {0x0000008e, 0xffffefff},
1367         {0x0000008f, 0xfff3efff},
1368         {0x00000090, 0xfff3efbf},
1369         {0x00000094, 0x00101101},
1370         {0x00000095, 0x00000fff},
1371         {0x00000096, 0x00116fff},
1372         {0x00000097, 0x60010000},
1373         {0x00000098, 0x10010000},
1374         {0x00000099, 0x00006000},
1375         {0x0000009a, 0x00001000},
1376         {0x0000009f, 0x00a37400}
1377 };
1378
1379 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1380         {0x0000006f, 0x03044000},
1381         {0x00000070, 0x0480c018},
1382         {0x00000071, 0x00000040},
1383         {0x00000072, 0x01000000},
1384         {0x00000074, 0x000000ff},
1385         {0x00000075, 0x00143400},
1386         {0x00000076, 0x08ec0800},
1387         {0x00000077, 0x040000cc},
1388         {0x00000079, 0x00000000},
1389         {0x0000007a, 0x21000409},
1390         {0x0000007c, 0x00000000},
1391         {0x0000007d, 0xe8000000},
1392         {0x0000007e, 0x044408a8},
1393         {0x0000007f, 0x00000003},
1394         {0x00000080, 0x00000000},
1395         {0x00000081, 0x01000000},
1396         {0x00000082, 0x02000000},
1397         {0x00000083, 0x00000000},
1398         {0x00000084, 0xe3f3e4f4},
1399         {0x00000085, 0x00052024},
1400         {0x00000087, 0x00000000},
1401         {0x00000088, 0x66036603},
1402         {0x00000089, 0x01000000},
1403         {0x0000008b, 0x1c0a0000},
1404         {0x0000008c, 0xff010000},
1405         {0x0000008e, 0xffffefff},
1406         {0x0000008f, 0xfff3efff},
1407         {0x00000090, 0xfff3efbf},
1408         {0x00000094, 0x00101101},
1409         {0x00000095, 0x00000fff},
1410         {0x00000096, 0x00116fff},
1411         {0x00000097, 0x60010000},
1412         {0x00000098, 0x10010000},
1413         {0x00000099, 0x00006000},
1414         {0x0000009a, 0x00001000},
1415         {0x0000009f, 0x00a17730}
1416 };
1417
1418 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1419         {0x0000006f, 0x03044000},
1420         {0x00000070, 0x0480c018},
1421         {0x00000071, 0x00000040},
1422         {0x00000072, 0x01000000},
1423         {0x00000074, 0x000000ff},
1424         {0x00000075, 0x00143400},
1425         {0x00000076, 0x08ec0800},
1426         {0x00000077, 0x040000cc},
1427         {0x00000079, 0x00000000},
1428         {0x0000007a, 0x21000409},
1429         {0x0000007c, 0x00000000},
1430         {0x0000007d, 0xe8000000},
1431         {0x0000007e, 0x044408a8},
1432         {0x0000007f, 0x00000003},
1433         {0x00000080, 0x00000000},
1434         {0x00000081, 0x01000000},
1435         {0x00000082, 0x02000000},
1436         {0x00000083, 0x00000000},
1437         {0x00000084, 0xe3f3e4f4},
1438         {0x00000085, 0x00052024},
1439         {0x00000087, 0x00000000},
1440         {0x00000088, 0x66036603},
1441         {0x00000089, 0x01000000},
1442         {0x0000008b, 0x1c0a0000},
1443         {0x0000008c, 0xff010000},
1444         {0x0000008e, 0xffffefff},
1445         {0x0000008f, 0xfff3efff},
1446         {0x00000090, 0xfff3efbf},
1447         {0x00000094, 0x00101101},
1448         {0x00000095, 0x00000fff},
1449         {0x00000096, 0x00116fff},
1450         {0x00000097, 0x60010000},
1451         {0x00000098, 0x10010000},
1452         {0x00000099, 0x00006000},
1453         {0x0000009a, 0x00001000},
1454         {0x0000009f, 0x00a07730}
1455 };
1456
1457 /* ucode loading */
1458 static int si_mc_load_microcode(struct radeon_device *rdev)
1459 {
1460         const __be32 *fw_data;
1461         u32 running, blackout = 0;
1462         u32 *io_mc_regs;
1463         int i, ucode_size, regs_size;
1464
1465         if (!rdev->mc_fw)
1466                 return -EINVAL;
1467
1468         switch (rdev->family) {
1469         case CHIP_TAHITI:
1470                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1471                 ucode_size = SI_MC_UCODE_SIZE;
1472                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1473                 break;
1474         case CHIP_PITCAIRN:
1475                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1476                 ucode_size = SI_MC_UCODE_SIZE;
1477                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1478                 break;
1479         case CHIP_VERDE:
1480         default:
1481                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1482                 ucode_size = SI_MC_UCODE_SIZE;
1483                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1484                 break;
1485         case CHIP_OLAND:
1486                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1487                 ucode_size = OLAND_MC_UCODE_SIZE;
1488                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1489                 break;
1490         case CHIP_HAINAN:
1491                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1492                 ucode_size = OLAND_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         }
1496
1497         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1498
1499         if (running == 0) {
1500                 if (running) {
1501                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1503                 }
1504
1505                 /* reset the engine and set to writable */
1506                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1508
1509                 /* load mc io regs */
1510                 for (i = 0; i < regs_size; i++) {
1511                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1513                 }
1514                 /* load the MC ucode */
1515                 fw_data = (const __be32 *)rdev->mc_fw->data;
1516                 for (i = 0; i < ucode_size; i++)
1517                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1518
1519                 /* put the engine back into the active state */
1520                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1523
1524                 /* wait for training to complete */
1525                 for (i = 0; i < rdev->usec_timeout; i++) {
1526                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1527                                 break;
1528                         udelay(1);
1529                 }
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1532                                 break;
1533                         udelay(1);
1534                 }
1535
1536                 if (running)
1537                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1538         }
1539
1540         return 0;
1541 }
1542
1543 static int si_init_microcode(struct radeon_device *rdev)
1544 {
1545         const char *chip_name;
1546         const char *rlc_chip_name;
1547         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1548         size_t smc_req_size;
1549         char fw_name[30];
1550         int err;
1551
1552         DRM_DEBUG("\n");
1553
1554         switch (rdev->family) {
1555         case CHIP_TAHITI:
1556                 chip_name = "TAHITI";
1557                 rlc_chip_name = "TAHITI";
1558                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1559                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1560                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1561                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1562                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1563                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1564                 break;
1565         case CHIP_PITCAIRN:
1566                 chip_name = "PITCAIRN";
1567                 rlc_chip_name = "PITCAIRN";
1568                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1570                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1571                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1573                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1574                 break;
1575         case CHIP_VERDE:
1576                 chip_name = "VERDE";
1577                 rlc_chip_name = "VERDE";
1578                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1579                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1580                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1581                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1582                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1583                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1584                 break;
1585         case CHIP_OLAND:
1586                 chip_name = "OLAND";
1587                 rlc_chip_name = "OLAND";
1588                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1589                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1590                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1591                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1592                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1593                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1594                 break;
1595         case CHIP_HAINAN:
1596                 chip_name = "HAINAN";
1597                 rlc_chip_name = "HAINAN";
1598                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1599                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1600                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1601                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1602                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1603                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1604                 break;
1605         default: BUG();
1606         }
1607
1608         DRM_INFO("Loading %s Microcode\n", chip_name);
1609
1610         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1611         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1612         if (err)
1613                 goto out;
1614         if (rdev->pfp_fw->size != pfp_req_size) {
1615                 printk(KERN_ERR
1616                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1617                        rdev->pfp_fw->size, fw_name);
1618                 err = -EINVAL;
1619                 goto out;
1620         }
1621
1622         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1623         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1624         if (err)
1625                 goto out;
1626         if (rdev->me_fw->size != me_req_size) {
1627                 printk(KERN_ERR
1628                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1629                        rdev->me_fw->size, fw_name);
1630                 err = -EINVAL;
1631         }
1632
1633         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1634         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1635         if (err)
1636                 goto out;
1637         if (rdev->ce_fw->size != ce_req_size) {
1638                 printk(KERN_ERR
1639                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1640                        rdev->ce_fw->size, fw_name);
1641                 err = -EINVAL;
1642         }
1643
1644         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1645         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1646         if (err)
1647                 goto out;
1648         if (rdev->rlc_fw->size != rlc_req_size) {
1649                 printk(KERN_ERR
1650                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1651                        rdev->rlc_fw->size, fw_name);
1652                 err = -EINVAL;
1653         }
1654
1655         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1656         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1657         if (err)
1658                 goto out;
1659         if (rdev->mc_fw->size != mc_req_size) {
1660                 printk(KERN_ERR
1661                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1662                        rdev->mc_fw->size, fw_name);
1663                 err = -EINVAL;
1664         }
1665
1666         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1667         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1668         if (err) {
1669                 printk(KERN_ERR
1670                        "smc: error loading firmware \"%s\"\n",
1671                        fw_name);
1672                 release_firmware(rdev->smc_fw);
1673                 rdev->smc_fw = NULL;
1674         } else if (rdev->smc_fw->size != smc_req_size) {
1675                 printk(KERN_ERR
1676                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1677                        rdev->smc_fw->size, fw_name);
1678                 err = -EINVAL;
1679         }
1680
1681 out:
1682         if (err) {
1683                 if (err != -EINVAL)
1684                         printk(KERN_ERR
1685                                "si_cp: Failed to load firmware \"%s\"\n",
1686                                fw_name);
1687                 release_firmware(rdev->pfp_fw);
1688                 rdev->pfp_fw = NULL;
1689                 release_firmware(rdev->me_fw);
1690                 rdev->me_fw = NULL;
1691                 release_firmware(rdev->ce_fw);
1692                 rdev->ce_fw = NULL;
1693                 release_firmware(rdev->rlc_fw);
1694                 rdev->rlc_fw = NULL;
1695                 release_firmware(rdev->mc_fw);
1696                 rdev->mc_fw = NULL;
1697                 release_firmware(rdev->smc_fw);
1698                 rdev->smc_fw = NULL;
1699         }
1700         return err;
1701 }
1702
1703 /* watermark setup */
1704 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1705                                    struct radeon_crtc *radeon_crtc,
1706                                    struct drm_display_mode *mode,
1707                                    struct drm_display_mode *other_mode)
1708 {
1709         u32 tmp;
1710         /*
1711          * Line Buffer Setup
1712          * There are 3 line buffers, each one shared by 2 display controllers.
1713          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1714          * the display controllers.  The paritioning is done via one of four
1715          * preset allocations specified in bits 21:20:
1716          *  0 - half lb
1717          *  2 - whole lb, other crtc must be disabled
1718          */
1719         /* this can get tricky if we have two large displays on a paired group
1720          * of crtcs.  Ideally for multiple large displays we'd assign them to
1721          * non-linked crtcs for maximum line buffer allocation.
1722          */
1723         if (radeon_crtc->base.enabled && mode) {
1724                 if (other_mode)
1725                         tmp = 0; /* 1/2 */
1726                 else
1727                         tmp = 2; /* whole */
1728         } else
1729                 tmp = 0;
1730
1731         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1732                DC_LB_MEMORY_CONFIG(tmp));
1733
1734         if (radeon_crtc->base.enabled && mode) {
1735                 switch (tmp) {
1736                 case 0:
1737                 default:
1738                         return 4096 * 2;
1739                 case 2:
1740                         return 8192 * 2;
1741                 }
1742         }
1743
1744         /* controller not enabled, so no lb used */
1745         return 0;
1746 }
1747
1748 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1749 {
1750         u32 tmp = RREG32(MC_SHARED_CHMAP);
1751
1752         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1753         case 0:
1754         default:
1755                 return 1;
1756         case 1:
1757                 return 2;
1758         case 2:
1759                 return 4;
1760         case 3:
1761                 return 8;
1762         case 4:
1763                 return 3;
1764         case 5:
1765                 return 6;
1766         case 6:
1767                 return 10;
1768         case 7:
1769                 return 12;
1770         case 8:
1771                 return 16;
1772         }
1773 }
1774
1775 struct dce6_wm_params {
1776         u32 dram_channels; /* number of dram channels */
1777         u32 yclk;          /* bandwidth per dram data pin in kHz */
1778         u32 sclk;          /* engine clock in kHz */
1779         u32 disp_clk;      /* display clock in kHz */
1780         u32 src_width;     /* viewport width */
1781         u32 active_time;   /* active display time in ns */
1782         u32 blank_time;    /* blank time in ns */
1783         bool interlaced;    /* mode is interlaced */
1784         fixed20_12 vsc;    /* vertical scale ratio */
1785         u32 num_heads;     /* number of active crtcs */
1786         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1787         u32 lb_size;       /* line buffer allocated to pipe */
1788         u32 vtaps;         /* vertical scaler taps */
1789 };
1790
1791 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1792 {
1793         /* Calculate raw DRAM Bandwidth */
1794         fixed20_12 dram_efficiency; /* 0.7 */
1795         fixed20_12 yclk, dram_channels, bandwidth;
1796         fixed20_12 a;
1797
1798         a.full = dfixed_const(1000);
1799         yclk.full = dfixed_const(wm->yclk);
1800         yclk.full = dfixed_div(yclk, a);
1801         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1802         a.full = dfixed_const(10);
1803         dram_efficiency.full = dfixed_const(7);
1804         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1805         bandwidth.full = dfixed_mul(dram_channels, yclk);
1806         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1807
1808         return dfixed_trunc(bandwidth);
1809 }
1810
1811 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1812 {
1813         /* Calculate DRAM Bandwidth and the part allocated to display. */
1814         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1815         fixed20_12 yclk, dram_channels, bandwidth;
1816         fixed20_12 a;
1817
1818         a.full = dfixed_const(1000);
1819         yclk.full = dfixed_const(wm->yclk);
1820         yclk.full = dfixed_div(yclk, a);
1821         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1822         a.full = dfixed_const(10);
1823         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1824         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1825         bandwidth.full = dfixed_mul(dram_channels, yclk);
1826         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1827
1828         return dfixed_trunc(bandwidth);
1829 }
1830
1831 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1832 {
1833         /* Calculate the display Data return Bandwidth */
1834         fixed20_12 return_efficiency; /* 0.8 */
1835         fixed20_12 sclk, bandwidth;
1836         fixed20_12 a;
1837
1838         a.full = dfixed_const(1000);
1839         sclk.full = dfixed_const(wm->sclk);
1840         sclk.full = dfixed_div(sclk, a);
1841         a.full = dfixed_const(10);
1842         return_efficiency.full = dfixed_const(8);
1843         return_efficiency.full = dfixed_div(return_efficiency, a);
1844         a.full = dfixed_const(32);
1845         bandwidth.full = dfixed_mul(a, sclk);
1846         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1847
1848         return dfixed_trunc(bandwidth);
1849 }
1850
1851 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1852 {
1853         return 32;
1854 }
1855
1856 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1857 {
1858         /* Calculate the DMIF Request Bandwidth */
1859         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1860         fixed20_12 disp_clk, sclk, bandwidth;
1861         fixed20_12 a, b1, b2;
1862         u32 min_bandwidth;
1863
1864         a.full = dfixed_const(1000);
1865         disp_clk.full = dfixed_const(wm->disp_clk);
1866         disp_clk.full = dfixed_div(disp_clk, a);
1867         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1868         b1.full = dfixed_mul(a, disp_clk);
1869
1870         a.full = dfixed_const(1000);
1871         sclk.full = dfixed_const(wm->sclk);
1872         sclk.full = dfixed_div(sclk, a);
1873         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1874         b2.full = dfixed_mul(a, sclk);
1875
1876         a.full = dfixed_const(10);
1877         disp_clk_request_efficiency.full = dfixed_const(8);
1878         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1879
1880         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1881
1882         a.full = dfixed_const(min_bandwidth);
1883         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1884
1885         return dfixed_trunc(bandwidth);
1886 }
1887
1888 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1889 {
1890         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1891         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1892         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1893         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1894
1895         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1896 }
1897
1898 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1899 {
1900         /* Calculate the display mode Average Bandwidth
1901          * DisplayMode should contain the source and destination dimensions,
1902          * timing, etc.
1903          */
1904         fixed20_12 bpp;
1905         fixed20_12 line_time;
1906         fixed20_12 src_width;
1907         fixed20_12 bandwidth;
1908         fixed20_12 a;
1909
1910         a.full = dfixed_const(1000);
1911         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1912         line_time.full = dfixed_div(line_time, a);
1913         bpp.full = dfixed_const(wm->bytes_per_pixel);
1914         src_width.full = dfixed_const(wm->src_width);
1915         bandwidth.full = dfixed_mul(src_width, bpp);
1916         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1917         bandwidth.full = dfixed_div(bandwidth, line_time);
1918
1919         return dfixed_trunc(bandwidth);
1920 }
1921
1922 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1923 {
1924         /* First calcualte the latency in ns */
1925         u32 mc_latency = 2000; /* 2000 ns. */
1926         u32 available_bandwidth = dce6_available_bandwidth(wm);
1927         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1928         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1929         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1930         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1931                 (wm->num_heads * cursor_line_pair_return_time);
1932         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1933         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1934         u32 tmp, dmif_size = 12288;
1935         fixed20_12 a, b, c;
1936
1937         if (wm->num_heads == 0)
1938                 return 0;
1939
1940         a.full = dfixed_const(2);
1941         b.full = dfixed_const(1);
1942         if ((wm->vsc.full > a.full) ||
1943             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1944             (wm->vtaps >= 5) ||
1945             ((wm->vsc.full >= a.full) && wm->interlaced))
1946                 max_src_lines_per_dst_line = 4;
1947         else
1948                 max_src_lines_per_dst_line = 2;
1949
1950         a.full = dfixed_const(available_bandwidth);
1951         b.full = dfixed_const(wm->num_heads);
1952         a.full = dfixed_div(a, b);
1953
1954         b.full = dfixed_const(mc_latency + 512);
1955         c.full = dfixed_const(wm->disp_clk);
1956         b.full = dfixed_div(b, c);
1957
1958         c.full = dfixed_const(dmif_size);
1959         b.full = dfixed_div(c, b);
1960
1961         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1962
1963         b.full = dfixed_const(1000);
1964         c.full = dfixed_const(wm->disp_clk);
1965         b.full = dfixed_div(c, b);
1966         c.full = dfixed_const(wm->bytes_per_pixel);
1967         b.full = dfixed_mul(b, c);
1968
1969         lb_fill_bw = min(tmp, dfixed_trunc(b));
1970
1971         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1972         b.full = dfixed_const(1000);
1973         c.full = dfixed_const(lb_fill_bw);
1974         b.full = dfixed_div(c, b);
1975         a.full = dfixed_div(a, b);
1976         line_fill_time = dfixed_trunc(a);
1977
1978         if (line_fill_time < wm->active_time)
1979                 return latency;
1980         else
1981                 return latency + (line_fill_time - wm->active_time);
1982
1983 }
1984
1985 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1986 {
1987         if (dce6_average_bandwidth(wm) <=
1988             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1989                 return true;
1990         else
1991                 return false;
1992 };
1993
1994 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1995 {
1996         if (dce6_average_bandwidth(wm) <=
1997             (dce6_available_bandwidth(wm) / wm->num_heads))
1998                 return true;
1999         else
2000                 return false;
2001 };
2002
2003 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2004 {
2005         u32 lb_partitions = wm->lb_size / wm->src_width;
2006         u32 line_time = wm->active_time + wm->blank_time;
2007         u32 latency_tolerant_lines;
2008         u32 latency_hiding;
2009         fixed20_12 a;
2010
2011         a.full = dfixed_const(1);
2012         if (wm->vsc.full > a.full)
2013                 latency_tolerant_lines = 1;
2014         else {
2015                 if (lb_partitions <= (wm->vtaps + 1))
2016                         latency_tolerant_lines = 1;
2017                 else
2018                         latency_tolerant_lines = 2;
2019         }
2020
2021         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2022
2023         if (dce6_latency_watermark(wm) <= latency_hiding)
2024                 return true;
2025         else
2026                 return false;
2027 }
2028
2029 static void dce6_program_watermarks(struct radeon_device *rdev,
2030                                          struct radeon_crtc *radeon_crtc,
2031                                          u32 lb_size, u32 num_heads)
2032 {
2033         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2034         struct dce6_wm_params wm_low, wm_high;
2035         u32 dram_channels;
2036         u32 pixel_period;
2037         u32 line_time = 0;
2038         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2039         u32 priority_a_mark = 0, priority_b_mark = 0;
2040         u32 priority_a_cnt = PRIORITY_OFF;
2041         u32 priority_b_cnt = PRIORITY_OFF;
2042         u32 tmp, arb_control3;
2043         fixed20_12 a, b, c;
2044
2045         if (radeon_crtc->base.enabled && num_heads && mode) {
2046                 pixel_period = 1000000 / (u32)mode->clock;
2047                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2048                 priority_a_cnt = 0;
2049                 priority_b_cnt = 0;
2050
2051                 if (rdev->family == CHIP_ARUBA)
2052                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2053                 else
2054                         dram_channels = si_get_number_of_dram_channels(rdev);
2055
2056                 /* watermark for high clocks */
2057                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2058                         wm_high.yclk =
2059                                 radeon_dpm_get_mclk(rdev, false) * 10;
2060                         wm_high.sclk =
2061                                 radeon_dpm_get_sclk(rdev, false) * 10;
2062                 } else {
2063                         wm_high.yclk = rdev->pm.current_mclk * 10;
2064                         wm_high.sclk = rdev->pm.current_sclk * 10;
2065                 }
2066
2067                 wm_high.disp_clk = mode->clock;
2068                 wm_high.src_width = mode->crtc_hdisplay;
2069                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2070                 wm_high.blank_time = line_time - wm_high.active_time;
2071                 wm_high.interlaced = false;
2072                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2073                         wm_high.interlaced = true;
2074                 wm_high.vsc = radeon_crtc->vsc;
2075                 wm_high.vtaps = 1;
2076                 if (radeon_crtc->rmx_type != RMX_OFF)
2077                         wm_high.vtaps = 2;
2078                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2079                 wm_high.lb_size = lb_size;
2080                 wm_high.dram_channels = dram_channels;
2081                 wm_high.num_heads = num_heads;
2082
2083                 /* watermark for low clocks */
2084                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2085                         wm_low.yclk =
2086                                 radeon_dpm_get_mclk(rdev, true) * 10;
2087                         wm_low.sclk =
2088                                 radeon_dpm_get_sclk(rdev, true) * 10;
2089                 } else {
2090                         wm_low.yclk = rdev->pm.current_mclk * 10;
2091                         wm_low.sclk = rdev->pm.current_sclk * 10;
2092                 }
2093
2094                 wm_low.disp_clk = mode->clock;
2095                 wm_low.src_width = mode->crtc_hdisplay;
2096                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2097                 wm_low.blank_time = line_time - wm_low.active_time;
2098                 wm_low.interlaced = false;
2099                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2100                         wm_low.interlaced = true;
2101                 wm_low.vsc = radeon_crtc->vsc;
2102                 wm_low.vtaps = 1;
2103                 if (radeon_crtc->rmx_type != RMX_OFF)
2104                         wm_low.vtaps = 2;
2105                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2106                 wm_low.lb_size = lb_size;
2107                 wm_low.dram_channels = dram_channels;
2108                 wm_low.num_heads = num_heads;
2109
2110                 /* set for high clocks */
2111                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2112                 /* set for low clocks */
2113                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2114
2115                 /* possibly force display priority to high */
2116                 /* should really do this at mode validation time... */
2117                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2118                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2119                     !dce6_check_latency_hiding(&wm_high) ||
2120                     (rdev->disp_priority == 2)) {
2121                         DRM_DEBUG_KMS("force priority to high\n");
2122                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2123                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2124                 }
2125                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2126                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2127                     !dce6_check_latency_hiding(&wm_low) ||
2128                     (rdev->disp_priority == 2)) {
2129                         DRM_DEBUG_KMS("force priority to high\n");
2130                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2131                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2132                 }
2133
2134                 a.full = dfixed_const(1000);
2135                 b.full = dfixed_const(mode->clock);
2136                 b.full = dfixed_div(b, a);
2137                 c.full = dfixed_const(latency_watermark_a);
2138                 c.full = dfixed_mul(c, b);
2139                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2140                 c.full = dfixed_div(c, a);
2141                 a.full = dfixed_const(16);
2142                 c.full = dfixed_div(c, a);
2143                 priority_a_mark = dfixed_trunc(c);
2144                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2145
2146                 a.full = dfixed_const(1000);
2147                 b.full = dfixed_const(mode->clock);
2148                 b.full = dfixed_div(b, a);
2149                 c.full = dfixed_const(latency_watermark_b);
2150                 c.full = dfixed_mul(c, b);
2151                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2152                 c.full = dfixed_div(c, a);
2153                 a.full = dfixed_const(16);
2154                 c.full = dfixed_div(c, a);
2155                 priority_b_mark = dfixed_trunc(c);
2156                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2157         }
2158
2159         /* select wm A */
2160         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2161         tmp = arb_control3;
2162         tmp &= ~LATENCY_WATERMARK_MASK(3);
2163         tmp |= LATENCY_WATERMARK_MASK(1);
2164         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2165         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2166                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2167                 LATENCY_HIGH_WATERMARK(line_time)));
2168         /* select wm B */
2169         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2170         tmp &= ~LATENCY_WATERMARK_MASK(3);
2171         tmp |= LATENCY_WATERMARK_MASK(2);
2172         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2173         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2174                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2175                 LATENCY_HIGH_WATERMARK(line_time)));
2176         /* restore original selection */
2177         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2178
2179         /* write the priority marks */
2180         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2181         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2182
2183         /* save values for DPM */
2184         radeon_crtc->line_time = line_time;
2185         radeon_crtc->wm_high = latency_watermark_a;
2186         radeon_crtc->wm_low = latency_watermark_b;
2187 }
2188
2189 void dce6_bandwidth_update(struct radeon_device *rdev)
2190 {
2191         struct drm_display_mode *mode0 = NULL;
2192         struct drm_display_mode *mode1 = NULL;
2193         u32 num_heads = 0, lb_size;
2194         int i;
2195
2196         radeon_update_display_priority(rdev);
2197
2198         for (i = 0; i < rdev->num_crtc; i++) {
2199                 if (rdev->mode_info.crtcs[i]->base.enabled)
2200                         num_heads++;
2201         }
2202         for (i = 0; i < rdev->num_crtc; i += 2) {
2203                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2204                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2205                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2206                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2207                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2208                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2209         }
2210 }
2211
2212 /*
2213  * Core functions
2214  */
2215 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2216 {
2217         const u32 num_tile_mode_states = 32;
2218         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2219
2220         switch (rdev->config.si.mem_row_size_in_kb) {
2221         case 1:
2222                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2223                 break;
2224         case 2:
2225         default:
2226                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2227                 break;
2228         case 4:
2229                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2230                 break;
2231         }
2232
2233         if ((rdev->family == CHIP_TAHITI) ||
2234             (rdev->family == CHIP_PITCAIRN)) {
2235                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2236                         switch (reg_offset) {
2237                         case 0:  /* non-AA compressed depth or any compressed stencil */
2238                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2240                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2241                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2242                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2243                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2246                                 break;
2247                         case 1:  /* 2xAA/4xAA compressed depth only */
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2250                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2251                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2252                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2253                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2256                                 break;
2257                         case 2:  /* 8xAA compressed depth only */
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2261                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2262                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2263                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2266                                 break;
2267                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2270                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2271                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2272                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2273                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2276                                 break;
2277                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2282                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2283                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2286                                 break;
2287                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2288                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2290                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2291                                                  TILE_SPLIT(split_equal_to_row_size) |
2292                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2293                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2296                                 break;
2297                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2298                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2300                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2301                                                  TILE_SPLIT(split_equal_to_row_size) |
2302                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2303                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2306                                 break;
2307                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2308                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2310                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2311                                                  TILE_SPLIT(split_equal_to_row_size) |
2312                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2313                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2314                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2316                                 break;
2317                         case 8:  /* 1D and 1D Array Surfaces */
2318                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2319                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2320                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2321                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2322                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2323                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2325                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2326                                 break;
2327                         case 9:  /* Displayable maps. */
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2331                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2332                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2333                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2335                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2336                                 break;
2337                         case 10:  /* Display 8bpp. */
2338                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2340                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2341                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2342                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2343                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2346                                 break;
2347                         case 11:  /* Display 16bpp. */
2348                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2351                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2353                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2356                                 break;
2357                         case 12:  /* Display 32bpp. */
2358                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2361                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2362                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2363                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2366                                 break;
2367                         case 13:  /* Thin. */
2368                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2370                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2371                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2372                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2373                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2376                                 break;
2377                         case 14:  /* Thin 8 bpp. */
2378                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2380                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2381                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2382                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2383                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2386                                 break;
2387                         case 15:  /* Thin 16 bpp. */
2388                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2391                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2392                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2393                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2395                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2396                                 break;
2397                         case 16:  /* Thin 32 bpp. */
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2401                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2402                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2403                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2406                                 break;
2407                         case 17:  /* Thin 64 bpp. */
2408                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2410                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2411                                                  TILE_SPLIT(split_equal_to_row_size) |
2412                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2413                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2415                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2416                                 break;
2417                         case 21:  /* 8 bpp PRT. */
2418                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2420                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2421                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2422                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2423                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2424                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2425                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2426                                 break;
2427                         case 22:  /* 16 bpp PRT */
2428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2430                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2431                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2432                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2433                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2436                                 break;
2437                         case 23:  /* 32 bpp PRT */
2438                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2440                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2441                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2442                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2443                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2445                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2446                                 break;
2447                         case 24:  /* 64 bpp PRT */
2448                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2450                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2451                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2452                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2453                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2456                                 break;
2457                         case 25:  /* 128 bpp PRT */
2458                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2460                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2461                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2462                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2463                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2466                                 break;
2467                         default:
2468                                 gb_tile_moden = 0;
2469                                 break;
2470                         }
2471                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2472                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2473                 }
2474         } else if ((rdev->family == CHIP_VERDE) ||
2475                    (rdev->family == CHIP_OLAND) ||
2476                    (rdev->family == CHIP_HAINAN)) {
2477                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2478                         switch (reg_offset) {
2479                         case 0:  /* non-AA compressed depth or any compressed stencil */
2480                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2482                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2483                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2484                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2485                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2487                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2488                                 break;
2489                         case 1:  /* 2xAA/4xAA compressed depth only */
2490                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2492                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2493                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2494                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2495                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2497                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2498                                 break;
2499                         case 2:  /* 8xAA compressed depth only */
2500                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2502                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2503                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2504                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2505                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2508                                 break;
2509                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2510                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2512                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2513                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2515                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2518                                 break;
2519                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2520                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2525                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2527                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2528                                 break;
2529                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2530                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2532                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2533                                                  TILE_SPLIT(split_equal_to_row_size) |
2534                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2535                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2537                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2538                                 break;
2539                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2540                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2543                                                  TILE_SPLIT(split_equal_to_row_size) |
2544                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2545                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2548                                 break;
2549                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2553                                                  TILE_SPLIT(split_equal_to_row_size) |
2554                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2555                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2557                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2558                                 break;
2559                         case 8:  /* 1D and 1D Array Surfaces */
2560                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2561                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2563                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2564                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2565                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2567                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568                                 break;
2569                         case 9:  /* Displayable maps. */
2570                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2574                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2575                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2578                                 break;
2579                         case 10:  /* Display 8bpp. */
2580                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2583                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2584                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2585                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2587                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2588                                 break;
2589                         case 11:  /* Display 16bpp. */
2590                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2592                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2593                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2594                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2595                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2598                                 break;
2599                         case 12:  /* Display 32bpp. */
2600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2603                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2605                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2608                                 break;
2609                         case 13:  /* Thin. */
2610                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2611                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2612                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2613                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2614                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2615                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2618                                 break;
2619                         case 14:  /* Thin 8 bpp. */
2620                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2622                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2623                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2624                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2625                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2628                                 break;
2629                         case 15:  /* Thin 16 bpp. */
2630                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2631                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2632                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2634                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2635                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2638                                 break;
2639                         case 16:  /* Thin 32 bpp. */
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2644                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2645                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2648                                 break;
2649                         case 17:  /* Thin 64 bpp. */
2650                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2653                                                  TILE_SPLIT(split_equal_to_row_size) |
2654                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2655                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2658                                 break;
2659                         case 21:  /* 8 bpp PRT. */
2660                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2662                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2663                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2664                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2665                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2666                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2668                                 break;
2669                         case 22:  /* 16 bpp PRT */
2670                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2672                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2673                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2674                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2675                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2677                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2678                                 break;
2679                         case 23:  /* 32 bpp PRT */
2680                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2682                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2684                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2685                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2688                                 break;
2689                         case 24:  /* 64 bpp PRT */
2690                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2691                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2692                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2693                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2694                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2695                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2697                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2698                                 break;
2699                         case 25:  /* 128 bpp PRT */
2700                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2704                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2705                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2707                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2708                                 break;
2709                         default:
2710                                 gb_tile_moden = 0;
2711                                 break;
2712                         }
2713                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2714                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2715                 }
2716         } else
2717                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2718 }
2719
2720 static void si_select_se_sh(struct radeon_device *rdev,
2721                             u32 se_num, u32 sh_num)
2722 {
2723         u32 data = INSTANCE_BROADCAST_WRITES;
2724
2725         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2726                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2727         else if (se_num == 0xffffffff)
2728                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2729         else if (sh_num == 0xffffffff)
2730                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2731         else
2732                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2733         WREG32(GRBM_GFX_INDEX, data);
2734 }
2735
2736 static u32 si_create_bitmask(u32 bit_width)
2737 {
2738         u32 i, mask = 0;
2739
2740         for (i = 0; i < bit_width; i++) {
2741                 mask <<= 1;
2742                 mask |= 1;
2743         }
2744         return mask;
2745 }
2746
2747 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2748 {
2749         u32 data, mask;
2750
2751         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2752         if (data & 1)
2753                 data &= INACTIVE_CUS_MASK;
2754         else
2755                 data = 0;
2756         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2757
2758         data >>= INACTIVE_CUS_SHIFT;
2759
2760         mask = si_create_bitmask(cu_per_sh);
2761
2762         return ~data & mask;
2763 }
2764
2765 static void si_setup_spi(struct radeon_device *rdev,
2766                          u32 se_num, u32 sh_per_se,
2767                          u32 cu_per_sh)
2768 {
2769         int i, j, k;
2770         u32 data, mask, active_cu;
2771
2772         for (i = 0; i < se_num; i++) {
2773                 for (j = 0; j < sh_per_se; j++) {
2774                         si_select_se_sh(rdev, i, j);
2775                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2776                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2777
2778                         mask = 1;
2779                         for (k = 0; k < 16; k++) {
2780                                 mask <<= k;
2781                                 if (active_cu & mask) {
2782                                         data &= ~mask;
2783                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2784                                         break;
2785                                 }
2786                         }
2787                 }
2788         }
2789         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2790 }
2791
2792 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2793                               u32 max_rb_num, u32 se_num,
2794                               u32 sh_per_se)
2795 {
2796         u32 data, mask;
2797
2798         data = RREG32(CC_RB_BACKEND_DISABLE);
2799         if (data & 1)
2800                 data &= BACKEND_DISABLE_MASK;
2801         else
2802                 data = 0;
2803         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2804
2805         data >>= BACKEND_DISABLE_SHIFT;
2806
2807         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2808
2809         return data & mask;
2810 }
2811
2812 static void si_setup_rb(struct radeon_device *rdev,
2813                         u32 se_num, u32 sh_per_se,
2814                         u32 max_rb_num)
2815 {
2816         int i, j;
2817         u32 data, mask;
2818         u32 disabled_rbs = 0;
2819         u32 enabled_rbs = 0;
2820
2821         for (i = 0; i < se_num; i++) {
2822                 for (j = 0; j < sh_per_se; j++) {
2823                         si_select_se_sh(rdev, i, j);
2824                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2825                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2826                 }
2827         }
2828         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2829
2830         mask = 1;
2831         for (i = 0; i < max_rb_num; i++) {
2832                 if (!(disabled_rbs & mask))
2833                         enabled_rbs |= mask;
2834                 mask <<= 1;
2835         }
2836
2837         for (i = 0; i < se_num; i++) {
2838                 si_select_se_sh(rdev, i, 0xffffffff);
2839                 data = 0;
2840                 for (j = 0; j < sh_per_se; j++) {
2841                         switch (enabled_rbs & 3) {
2842                         case 1:
2843                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2844                                 break;
2845                         case 2:
2846                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2847                                 break;
2848                         case 3:
2849                         default:
2850                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2851                                 break;
2852                         }
2853                         enabled_rbs >>= 2;
2854                 }
2855                 WREG32(PA_SC_RASTER_CONFIG, data);
2856         }
2857         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2858 }
2859
2860 static void si_gpu_init(struct radeon_device *rdev)
2861 {
2862         u32 gb_addr_config = 0;
2863         u32 mc_shared_chmap, mc_arb_ramcfg;
2864         u32 sx_debug_1;
2865         u32 hdp_host_path_cntl;
2866         u32 tmp;
2867         int i, j;
2868
2869         switch (rdev->family) {
2870         case CHIP_TAHITI:
2871                 rdev->config.si.max_shader_engines = 2;
2872                 rdev->config.si.max_tile_pipes = 12;
2873                 rdev->config.si.max_cu_per_sh = 8;
2874                 rdev->config.si.max_sh_per_se = 2;
2875                 rdev->config.si.max_backends_per_se = 4;
2876                 rdev->config.si.max_texture_channel_caches = 12;
2877                 rdev->config.si.max_gprs = 256;
2878                 rdev->config.si.max_gs_threads = 32;
2879                 rdev->config.si.max_hw_contexts = 8;
2880
2881                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2882                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2883                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2884                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2885                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2886                 break;
2887         case CHIP_PITCAIRN:
2888                 rdev->config.si.max_shader_engines = 2;
2889                 rdev->config.si.max_tile_pipes = 8;
2890                 rdev->config.si.max_cu_per_sh = 5;
2891                 rdev->config.si.max_sh_per_se = 2;
2892                 rdev->config.si.max_backends_per_se = 4;
2893                 rdev->config.si.max_texture_channel_caches = 8;
2894                 rdev->config.si.max_gprs = 256;
2895                 rdev->config.si.max_gs_threads = 32;
2896                 rdev->config.si.max_hw_contexts = 8;
2897
2898                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2899                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2900                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2901                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2902                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2903                 break;
2904         case CHIP_VERDE:
2905         default:
2906                 rdev->config.si.max_shader_engines = 1;
2907                 rdev->config.si.max_tile_pipes = 4;
2908                 rdev->config.si.max_cu_per_sh = 5;
2909                 rdev->config.si.max_sh_per_se = 2;
2910                 rdev->config.si.max_backends_per_se = 4;
2911                 rdev->config.si.max_texture_channel_caches = 4;
2912                 rdev->config.si.max_gprs = 256;
2913                 rdev->config.si.max_gs_threads = 32;
2914                 rdev->config.si.max_hw_contexts = 8;
2915
2916                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2917                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2918                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2919                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2920                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2921                 break;
2922         case CHIP_OLAND:
2923                 rdev->config.si.max_shader_engines = 1;
2924                 rdev->config.si.max_tile_pipes = 4;
2925                 rdev->config.si.max_cu_per_sh = 6;
2926                 rdev->config.si.max_sh_per_se = 1;
2927                 rdev->config.si.max_backends_per_se = 2;
2928                 rdev->config.si.max_texture_channel_caches = 4;
2929                 rdev->config.si.max_gprs = 256;
2930                 rdev->config.si.max_gs_threads = 16;
2931                 rdev->config.si.max_hw_contexts = 8;
2932
2933                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2934                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2935                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2936                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2937                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2938                 break;
2939         case CHIP_HAINAN:
2940                 rdev->config.si.max_shader_engines = 1;
2941                 rdev->config.si.max_tile_pipes = 4;
2942                 rdev->config.si.max_cu_per_sh = 5;
2943                 rdev->config.si.max_sh_per_se = 1;
2944                 rdev->config.si.max_backends_per_se = 1;
2945                 rdev->config.si.max_texture_channel_caches = 2;
2946                 rdev->config.si.max_gprs = 256;
2947                 rdev->config.si.max_gs_threads = 16;
2948                 rdev->config.si.max_hw_contexts = 8;
2949
2950                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2951                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2952                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2953                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2954                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2955                 break;
2956         }
2957
2958         /* Initialize HDP */
2959         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2960                 WREG32((0x2c14 + j), 0x00000000);
2961                 WREG32((0x2c18 + j), 0x00000000);
2962                 WREG32((0x2c1c + j), 0x00000000);
2963                 WREG32((0x2c20 + j), 0x00000000);
2964                 WREG32((0x2c24 + j), 0x00000000);
2965         }
2966
2967         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2968
2969         evergreen_fix_pci_max_read_req_size(rdev);
2970
2971         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2972
2973         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2974         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2975
2976         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2977         rdev->config.si.mem_max_burst_length_bytes = 256;
2978         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2979         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2980         if (rdev->config.si.mem_row_size_in_kb > 4)
2981                 rdev->config.si.mem_row_size_in_kb = 4;
2982         /* XXX use MC settings? */
2983         rdev->config.si.shader_engine_tile_size = 32;
2984         rdev->config.si.num_gpus = 1;
2985         rdev->config.si.multi_gpu_tile_size = 64;
2986
2987         /* fix up row size */
2988         gb_addr_config &= ~ROW_SIZE_MASK;
2989         switch (rdev->config.si.mem_row_size_in_kb) {
2990         case 1:
2991         default:
2992                 gb_addr_config |= ROW_SIZE(0);
2993                 break;
2994         case 2:
2995                 gb_addr_config |= ROW_SIZE(1);
2996                 break;
2997         case 4:
2998                 gb_addr_config |= ROW_SIZE(2);
2999                 break;
3000         }
3001
3002         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3003          * not have bank info, so create a custom tiling dword.
3004          * bits 3:0   num_pipes
3005          * bits 7:4   num_banks
3006          * bits 11:8  group_size
3007          * bits 15:12 row_size
3008          */
3009         rdev->config.si.tile_config = 0;
3010         switch (rdev->config.si.num_tile_pipes) {
3011         case 1:
3012                 rdev->config.si.tile_config |= (0 << 0);
3013                 break;
3014         case 2:
3015                 rdev->config.si.tile_config |= (1 << 0);
3016                 break;
3017         case 4:
3018                 rdev->config.si.tile_config |= (2 << 0);
3019                 break;
3020         case 8:
3021         default:
3022                 /* XXX what about 12? */
3023                 rdev->config.si.tile_config |= (3 << 0);
3024                 break;
3025         }       
3026         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3027         case 0: /* four banks */
3028                 rdev->config.si.tile_config |= 0 << 4;
3029                 break;
3030         case 1: /* eight banks */
3031                 rdev->config.si.tile_config |= 1 << 4;
3032                 break;
3033         case 2: /* sixteen banks */
3034         default:
3035                 rdev->config.si.tile_config |= 2 << 4;
3036                 break;
3037         }
3038         rdev->config.si.tile_config |=
3039                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3040         rdev->config.si.tile_config |=
3041                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3042
3043         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3044         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3045         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3046         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3047         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3048         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3049         if (rdev->has_uvd) {
3050                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3051                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3052                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3053         }
3054
3055         si_tiling_mode_table_init(rdev);
3056
3057         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3058                     rdev->config.si.max_sh_per_se,
3059                     rdev->config.si.max_backends_per_se);
3060
3061         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3062                      rdev->config.si.max_sh_per_se,
3063                      rdev->config.si.max_cu_per_sh);
3064
3065
3066         /* set HW defaults for 3D engine */
3067         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3068                                      ROQ_IB2_START(0x2b)));
3069         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3070
3071         sx_debug_1 = RREG32(SX_DEBUG_1);
3072         WREG32(SX_DEBUG_1, sx_debug_1);
3073
3074         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3075
3076         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3077                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3078                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3079                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3080
3081         WREG32(VGT_NUM_INSTANCES, 1);
3082
3083         WREG32(CP_PERFMON_CNTL, 0);
3084
3085         WREG32(SQ_CONFIG, 0);
3086
3087         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3088                                           FORCE_EOV_MAX_REZ_CNT(255)));
3089
3090         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3091                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3092
3093         WREG32(VGT_GS_VERTEX_REUSE, 16);
3094         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3095
3096         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3097         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3098         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3099         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3100         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3101         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3102         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3103         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3104
3105         tmp = RREG32(HDP_MISC_CNTL);
3106         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3107         WREG32(HDP_MISC_CNTL, tmp);
3108
3109         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3110         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3111
3112         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3113
3114         udelay(50);
3115 }
3116
3117 /*
3118  * GPU scratch registers helpers function.
3119  */
3120 static void si_scratch_init(struct radeon_device *rdev)
3121 {
3122         int i;
3123
3124         rdev->scratch.num_reg = 7;
3125         rdev->scratch.reg_base = SCRATCH_REG0;
3126         for (i = 0; i < rdev->scratch.num_reg; i++) {
3127                 rdev->scratch.free[i] = true;
3128                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3129         }
3130 }
3131
3132 void si_fence_ring_emit(struct radeon_device *rdev,
3133                         struct radeon_fence *fence)
3134 {
3135         struct radeon_ring *ring = &rdev->ring[fence->ring];
3136         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3137
3138         /* flush read cache over gart */
3139         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3140         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3141         radeon_ring_write(ring, 0);
3142         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3143         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3144                           PACKET3_TC_ACTION_ENA |
3145                           PACKET3_SH_KCACHE_ACTION_ENA |
3146                           PACKET3_SH_ICACHE_ACTION_ENA);
3147         radeon_ring_write(ring, 0xFFFFFFFF);
3148         radeon_ring_write(ring, 0);
3149         radeon_ring_write(ring, 10); /* poll interval */
3150         /* EVENT_WRITE_EOP - flush caches, send int */
3151         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3152         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3153         radeon_ring_write(ring, addr & 0xffffffff);
3154         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3155         radeon_ring_write(ring, fence->seq);
3156         radeon_ring_write(ring, 0);
3157 }
3158
3159 /*
3160  * IB stuff
3161  */
3162 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3163 {
3164         struct radeon_ring *ring = &rdev->ring[ib->ring];
3165         u32 header;
3166
3167         if (ib->is_const_ib) {
3168                 /* set switch buffer packet before const IB */
3169                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3170                 radeon_ring_write(ring, 0);
3171
3172                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3173         } else {
3174                 u32 next_rptr;
3175                 if (ring->rptr_save_reg) {
3176                         next_rptr = ring->wptr + 3 + 4 + 8;
3177                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3178                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3179                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3180                         radeon_ring_write(ring, next_rptr);
3181                 } else if (rdev->wb.enabled) {
3182                         next_rptr = ring->wptr + 5 + 4 + 8;
3183                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3184                         radeon_ring_write(ring, (1 << 8));
3185                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3186                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3187                         radeon_ring_write(ring, next_rptr);
3188                 }
3189
3190                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3191         }
3192
3193         radeon_ring_write(ring, header);
3194         radeon_ring_write(ring,
3195 #ifdef __BIG_ENDIAN
3196                           (2 << 0) |
3197 #endif
3198                           (ib->gpu_addr & 0xFFFFFFFC));
3199         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3200         radeon_ring_write(ring, ib->length_dw |
3201                           (ib->vm ? (ib->vm->id << 24) : 0));
3202
3203         if (!ib->is_const_ib) {
3204                 /* flush read cache over gart for this vmid */
3205                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3206                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3207                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3208                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3209                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3210                                   PACKET3_TC_ACTION_ENA |
3211                                   PACKET3_SH_KCACHE_ACTION_ENA |
3212                                   PACKET3_SH_ICACHE_ACTION_ENA);
3213                 radeon_ring_write(ring, 0xFFFFFFFF);
3214                 radeon_ring_write(ring, 0);
3215                 radeon_ring_write(ring, 10); /* poll interval */
3216         }
3217 }
3218
3219 /*
3220  * CP.
3221  */
3222 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3223 {
3224         if (enable)
3225                 WREG32(CP_ME_CNTL, 0);
3226         else {
3227                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3228                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3229                 WREG32(SCRATCH_UMSK, 0);
3230                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3231                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3232                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3233         }
3234         udelay(50);
3235 }
3236
3237 static int si_cp_load_microcode(struct radeon_device *rdev)
3238 {
3239         const __be32 *fw_data;
3240         int i;
3241
3242         if (!rdev->me_fw || !rdev->pfp_fw)
3243                 return -EINVAL;
3244
3245         si_cp_enable(rdev, false);
3246
3247         /* PFP */
3248         fw_data = (const __be32 *)rdev->pfp_fw->data;
3249         WREG32(CP_PFP_UCODE_ADDR, 0);
3250         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3251                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3252         WREG32(CP_PFP_UCODE_ADDR, 0);
3253
3254         /* CE */
3255         fw_data = (const __be32 *)rdev->ce_fw->data;
3256         WREG32(CP_CE_UCODE_ADDR, 0);
3257         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3258                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3259         WREG32(CP_CE_UCODE_ADDR, 0);
3260
3261         /* ME */
3262         fw_data = (const __be32 *)rdev->me_fw->data;
3263         WREG32(CP_ME_RAM_WADDR, 0);
3264         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3265                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3266         WREG32(CP_ME_RAM_WADDR, 0);
3267
3268         WREG32(CP_PFP_UCODE_ADDR, 0);
3269         WREG32(CP_CE_UCODE_ADDR, 0);
3270         WREG32(CP_ME_RAM_WADDR, 0);
3271         WREG32(CP_ME_RAM_RADDR, 0);
3272         return 0;
3273 }
3274
3275 static int si_cp_start(struct radeon_device *rdev)
3276 {
3277         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3278         int r, i;
3279
3280         r = radeon_ring_lock(rdev, ring, 7 + 4);
3281         if (r) {
3282                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3283                 return r;
3284         }
3285         /* init the CP */
3286         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3287         radeon_ring_write(ring, 0x1);
3288         radeon_ring_write(ring, 0x0);
3289         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3290         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3291         radeon_ring_write(ring, 0);
3292         radeon_ring_write(ring, 0);
3293
3294         /* init the CE partitions */
3295         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3296         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3297         radeon_ring_write(ring, 0xc000);
3298         radeon_ring_write(ring, 0xe000);
3299         radeon_ring_unlock_commit(rdev, ring);
3300
3301         si_cp_enable(rdev, true);
3302
3303         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3304         if (r) {
3305                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3306                 return r;
3307         }
3308
3309         /* setup clear context state */
3310         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3311         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3312
3313         for (i = 0; i < si_default_size; i++)
3314                 radeon_ring_write(ring, si_default_state[i]);
3315
3316         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3317         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3318
3319         /* set clear context state */
3320         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3321         radeon_ring_write(ring, 0);
3322
3323         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3324         radeon_ring_write(ring, 0x00000316);
3325         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3326         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3327
3328         radeon_ring_unlock_commit(rdev, ring);
3329
3330         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3331                 ring = &rdev->ring[i];
3332                 r = radeon_ring_lock(rdev, ring, 2);
3333
3334                 /* clear the compute context state */
3335                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3336                 radeon_ring_write(ring, 0);
3337
3338                 radeon_ring_unlock_commit(rdev, ring);
3339         }
3340
3341         return 0;
3342 }
3343
3344 static void si_cp_fini(struct radeon_device *rdev)
3345 {
3346         struct radeon_ring *ring;
3347         si_cp_enable(rdev, false);
3348
3349         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3350         radeon_ring_fini(rdev, ring);
3351         radeon_scratch_free(rdev, ring->rptr_save_reg);
3352
3353         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3354         radeon_ring_fini(rdev, ring);
3355         radeon_scratch_free(rdev, ring->rptr_save_reg);
3356
3357         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3358         radeon_ring_fini(rdev, ring);
3359         radeon_scratch_free(rdev, ring->rptr_save_reg);
3360 }
3361
3362 static int si_cp_resume(struct radeon_device *rdev)
3363 {
3364         struct radeon_ring *ring;
3365         u32 tmp;
3366         u32 rb_bufsz;
3367         int r;
3368
3369         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3370         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3371                                  SOFT_RESET_PA |
3372                                  SOFT_RESET_VGT |
3373                                  SOFT_RESET_SPI |
3374                                  SOFT_RESET_SX));
3375         RREG32(GRBM_SOFT_RESET);
3376         mdelay(15);
3377         WREG32(GRBM_SOFT_RESET, 0);
3378         RREG32(GRBM_SOFT_RESET);
3379
3380         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3381         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3382
3383         /* Set the write pointer delay */
3384         WREG32(CP_RB_WPTR_DELAY, 0);
3385
3386         WREG32(CP_DEBUG, 0);
3387         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3388
3389         /* ring 0 - compute and gfx */
3390         /* Set ring buffer size */
3391         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3392         rb_bufsz = drm_order(ring->ring_size / 8);
3393         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3394 #ifdef __BIG_ENDIAN
3395         tmp |= BUF_SWAP_32BIT;
3396 #endif
3397         WREG32(CP_RB0_CNTL, tmp);
3398
3399         /* Initialize the ring buffer's read and write pointers */
3400         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3401         ring->wptr = 0;
3402         WREG32(CP_RB0_WPTR, ring->wptr);
3403
3404         /* set the wb address whether it's enabled or not */
3405         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3406         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3407
3408         if (rdev->wb.enabled)
3409                 WREG32(SCRATCH_UMSK, 0xff);
3410         else {
3411                 tmp |= RB_NO_UPDATE;
3412                 WREG32(SCRATCH_UMSK, 0);
3413         }
3414
3415         mdelay(1);
3416         WREG32(CP_RB0_CNTL, tmp);
3417
3418         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3419
3420         ring->rptr = RREG32(CP_RB0_RPTR);
3421
3422         /* ring1  - compute only */
3423         /* Set ring buffer size */
3424         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3425         rb_bufsz = drm_order(ring->ring_size / 8);
3426         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3427 #ifdef __BIG_ENDIAN
3428         tmp |= BUF_SWAP_32BIT;
3429 #endif
3430         WREG32(CP_RB1_CNTL, tmp);
3431
3432         /* Initialize the ring buffer's read and write pointers */
3433         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3434         ring->wptr = 0;
3435         WREG32(CP_RB1_WPTR, ring->wptr);
3436
3437         /* set the wb address whether it's enabled or not */
3438         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3439         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3440
3441         mdelay(1);
3442         WREG32(CP_RB1_CNTL, tmp);
3443
3444         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3445
3446         ring->rptr = RREG32(CP_RB1_RPTR);
3447
3448         /* ring2 - compute only */
3449         /* Set ring buffer size */
3450         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3451         rb_bufsz = drm_order(ring->ring_size / 8);
3452         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3453 #ifdef __BIG_ENDIAN
3454         tmp |= BUF_SWAP_32BIT;
3455 #endif
3456         WREG32(CP_RB2_CNTL, tmp);
3457
3458         /* Initialize the ring buffer's read and write pointers */
3459         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3460         ring->wptr = 0;
3461         WREG32(CP_RB2_WPTR, ring->wptr);
3462
3463         /* set the wb address whether it's enabled or not */
3464         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3465         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3466
3467         mdelay(1);
3468         WREG32(CP_RB2_CNTL, tmp);
3469
3470         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3471
3472         ring->rptr = RREG32(CP_RB2_RPTR);
3473
3474         /* start the rings */
3475         si_cp_start(rdev);
3476         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3477         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3478         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3479         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3480         if (r) {
3481                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3482                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3483                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3484                 return r;
3485         }
3486         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3487         if (r) {
3488                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3489         }
3490         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3491         if (r) {
3492                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3493         }
3494
3495         return 0;
3496 }
3497
3498 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3499 {
3500         u32 reset_mask = 0;
3501         u32 tmp;
3502
3503         /* GRBM_STATUS */
3504         tmp = RREG32(GRBM_STATUS);
3505         if (tmp & (PA_BUSY | SC_BUSY |
3506                    BCI_BUSY | SX_BUSY |
3507                    TA_BUSY | VGT_BUSY |
3508                    DB_BUSY | CB_BUSY |
3509                    GDS_BUSY | SPI_BUSY |
3510                    IA_BUSY | IA_BUSY_NO_DMA))
3511                 reset_mask |= RADEON_RESET_GFX;
3512
3513         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3514                    CP_BUSY | CP_COHERENCY_BUSY))
3515                 reset_mask |= RADEON_RESET_CP;
3516
3517         if (tmp & GRBM_EE_BUSY)
3518                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3519
3520         /* GRBM_STATUS2 */
3521         tmp = RREG32(GRBM_STATUS2);
3522         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3523                 reset_mask |= RADEON_RESET_RLC;
3524
3525         /* DMA_STATUS_REG 0 */
3526         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3527         if (!(tmp & DMA_IDLE))
3528                 reset_mask |= RADEON_RESET_DMA;
3529
3530         /* DMA_STATUS_REG 1 */
3531         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3532         if (!(tmp & DMA_IDLE))
3533                 reset_mask |= RADEON_RESET_DMA1;
3534
3535         /* SRBM_STATUS2 */
3536         tmp = RREG32(SRBM_STATUS2);
3537         if (tmp & DMA_BUSY)
3538                 reset_mask |= RADEON_RESET_DMA;
3539
3540         if (tmp & DMA1_BUSY)
3541                 reset_mask |= RADEON_RESET_DMA1;
3542
3543         /* SRBM_STATUS */
3544         tmp = RREG32(SRBM_STATUS);
3545
3546         if (tmp & IH_BUSY)
3547                 reset_mask |= RADEON_RESET_IH;
3548
3549         if (tmp & SEM_BUSY)
3550                 reset_mask |= RADEON_RESET_SEM;
3551
3552         if (tmp & GRBM_RQ_PENDING)
3553                 reset_mask |= RADEON_RESET_GRBM;
3554
3555         if (tmp & VMC_BUSY)
3556                 reset_mask |= RADEON_RESET_VMC;
3557
3558         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3559                    MCC_BUSY | MCD_BUSY))
3560                 reset_mask |= RADEON_RESET_MC;
3561
3562         if (evergreen_is_display_hung(rdev))
3563                 reset_mask |= RADEON_RESET_DISPLAY;
3564
3565         /* VM_L2_STATUS */
3566         tmp = RREG32(VM_L2_STATUS);
3567         if (tmp & L2_BUSY)
3568                 reset_mask |= RADEON_RESET_VMC;
3569
3570         /* Skip MC reset as it's mostly likely not hung, just busy */
3571         if (reset_mask & RADEON_RESET_MC) {
3572                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3573                 reset_mask &= ~RADEON_RESET_MC;
3574         }
3575
3576         return reset_mask;
3577 }
3578
3579 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3580 {
3581         struct evergreen_mc_save save;
3582         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3583         u32 tmp;
3584
3585         if (reset_mask == 0)
3586                 return;
3587
3588         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3589
3590         evergreen_print_gpu_status_regs(rdev);
3591         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3592                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3593         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3594                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3595
3596         /* Disable CP parsing/prefetching */
3597         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3598
3599         if (reset_mask & RADEON_RESET_DMA) {
3600                 /* dma0 */
3601                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3602                 tmp &= ~DMA_RB_ENABLE;
3603                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3604         }
3605         if (reset_mask & RADEON_RESET_DMA1) {
3606                 /* dma1 */
3607                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3608                 tmp &= ~DMA_RB_ENABLE;
3609                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3610         }
3611
3612         udelay(50);
3613
3614         evergreen_mc_stop(rdev, &save);
3615         if (evergreen_mc_wait_for_idle(rdev)) {
3616                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3617         }
3618
3619         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3620                 grbm_soft_reset = SOFT_RESET_CB |
3621                         SOFT_RESET_DB |
3622                         SOFT_RESET_GDS |
3623                         SOFT_RESET_PA |
3624                         SOFT_RESET_SC |
3625                         SOFT_RESET_BCI |
3626                         SOFT_RESET_SPI |
3627                         SOFT_RESET_SX |
3628                         SOFT_RESET_TC |
3629                         SOFT_RESET_TA |
3630                         SOFT_RESET_VGT |
3631                         SOFT_RESET_IA;
3632         }
3633
3634         if (reset_mask & RADEON_RESET_CP) {
3635                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3636
3637                 srbm_soft_reset |= SOFT_RESET_GRBM;
3638         }
3639
3640         if (reset_mask & RADEON_RESET_DMA)
3641                 srbm_soft_reset |= SOFT_RESET_DMA;
3642
3643         if (reset_mask & RADEON_RESET_DMA1)
3644                 srbm_soft_reset |= SOFT_RESET_DMA1;
3645
3646         if (reset_mask & RADEON_RESET_DISPLAY)
3647                 srbm_soft_reset |= SOFT_RESET_DC;
3648
3649         if (reset_mask & RADEON_RESET_RLC)
3650                 grbm_soft_reset |= SOFT_RESET_RLC;
3651
3652         if (reset_mask & RADEON_RESET_SEM)
3653                 srbm_soft_reset |= SOFT_RESET_SEM;
3654
3655         if (reset_mask & RADEON_RESET_IH)
3656                 srbm_soft_reset |= SOFT_RESET_IH;
3657
3658         if (reset_mask & RADEON_RESET_GRBM)
3659                 srbm_soft_reset |= SOFT_RESET_GRBM;
3660
3661         if (reset_mask & RADEON_RESET_VMC)
3662                 srbm_soft_reset |= SOFT_RESET_VMC;
3663
3664         if (reset_mask & RADEON_RESET_MC)
3665                 srbm_soft_reset |= SOFT_RESET_MC;
3666
3667         if (grbm_soft_reset) {
3668                 tmp = RREG32(GRBM_SOFT_RESET);
3669                 tmp |= grbm_soft_reset;
3670                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3671                 WREG32(GRBM_SOFT_RESET, tmp);
3672                 tmp = RREG32(GRBM_SOFT_RESET);
3673
3674                 udelay(50);
3675
3676                 tmp &= ~grbm_soft_reset;
3677                 WREG32(GRBM_SOFT_RESET, tmp);
3678                 tmp = RREG32(GRBM_SOFT_RESET);
3679         }
3680
3681         if (srbm_soft_reset) {
3682                 tmp = RREG32(SRBM_SOFT_RESET);
3683                 tmp |= srbm_soft_reset;
3684                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3685                 WREG32(SRBM_SOFT_RESET, tmp);
3686                 tmp = RREG32(SRBM_SOFT_RESET);
3687
3688                 udelay(50);
3689
3690                 tmp &= ~srbm_soft_reset;
3691                 WREG32(SRBM_SOFT_RESET, tmp);
3692                 tmp = RREG32(SRBM_SOFT_RESET);
3693         }
3694
3695         /* Wait a little for things to settle down */
3696         udelay(50);
3697
3698         evergreen_mc_resume(rdev, &save);
3699         udelay(50);
3700
3701         evergreen_print_gpu_status_regs(rdev);
3702 }
3703
3704 int si_asic_reset(struct radeon_device *rdev)
3705 {
3706         u32 reset_mask;
3707
3708         reset_mask = si_gpu_check_soft_reset(rdev);
3709
3710         if (reset_mask)
3711                 r600_set_bios_scratch_engine_hung(rdev, true);
3712
3713         si_gpu_soft_reset(rdev, reset_mask);
3714
3715         reset_mask = si_gpu_check_soft_reset(rdev);
3716
3717         if (!reset_mask)
3718                 r600_set_bios_scratch_engine_hung(rdev, false);
3719
3720         return 0;
3721 }
3722
3723 /**
3724  * si_gfx_is_lockup - Check if the GFX engine is locked up
3725  *
3726  * @rdev: radeon_device pointer
3727  * @ring: radeon_ring structure holding ring information
3728  *
3729  * Check if the GFX engine is locked up.
3730  * Returns true if the engine appears to be locked up, false if not.
3731  */
3732 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3733 {
3734         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3735
3736         if (!(reset_mask & (RADEON_RESET_GFX |
3737                             RADEON_RESET_COMPUTE |
3738                             RADEON_RESET_CP))) {
3739                 radeon_ring_lockup_update(ring);
3740                 return false;
3741         }
3742         /* force CP activities */
3743         radeon_ring_force_activity(rdev, ring);
3744         return radeon_ring_test_lockup(rdev, ring);
3745 }
3746
3747 /**
3748  * si_dma_is_lockup - Check if the DMA engine is locked up
3749  *
3750  * @rdev: radeon_device pointer
3751  * @ring: radeon_ring structure holding ring information
3752  *
3753  * Check if the async DMA engine is locked up.
3754  * Returns true if the engine appears to be locked up, false if not.
3755  */
3756 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3757 {
3758         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3759         u32 mask;
3760
3761         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3762                 mask = RADEON_RESET_DMA;
3763         else
3764                 mask = RADEON_RESET_DMA1;
3765
3766         if (!(reset_mask & mask)) {
3767                 radeon_ring_lockup_update(ring);
3768                 return false;
3769         }
3770         /* force ring activities */
3771         radeon_ring_force_activity(rdev, ring);
3772         return radeon_ring_test_lockup(rdev, ring);
3773 }
3774
3775 /* MC */
3776 static void si_mc_program(struct radeon_device *rdev)
3777 {
3778         struct evergreen_mc_save save;
3779         u32 tmp;
3780         int i, j;
3781
3782         /* Initialize HDP */
3783         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3784                 WREG32((0x2c14 + j), 0x00000000);
3785                 WREG32((0x2c18 + j), 0x00000000);
3786                 WREG32((0x2c1c + j), 0x00000000);
3787                 WREG32((0x2c20 + j), 0x00000000);
3788                 WREG32((0x2c24 + j), 0x00000000);
3789         }
3790         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3791
3792         evergreen_mc_stop(rdev, &save);
3793         if (radeon_mc_wait_for_idle(rdev)) {
3794                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3795         }
3796         if (!ASIC_IS_NODCE(rdev))
3797                 /* Lockout access through VGA aperture*/
3798                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3799         /* Update configuration */
3800         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3801                rdev->mc.vram_start >> 12);
3802         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3803                rdev->mc.vram_end >> 12);
3804         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3805                rdev->vram_scratch.gpu_addr >> 12);
3806         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3807         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3808         WREG32(MC_VM_FB_LOCATION, tmp);
3809         /* XXX double check these! */
3810         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3811         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3812         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3813         WREG32(MC_VM_AGP_BASE, 0);
3814         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3815         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3816         if (radeon_mc_wait_for_idle(rdev)) {
3817                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3818         }
3819         evergreen_mc_resume(rdev, &save);
3820         if (!ASIC_IS_NODCE(rdev)) {
3821                 /* we need to own VRAM, so turn off the VGA renderer here
3822                  * to stop it overwriting our objects */
3823                 rv515_vga_render_disable(rdev);
3824         }
3825 }
3826
3827 void si_vram_gtt_location(struct radeon_device *rdev,
3828                           struct radeon_mc *mc)
3829 {
3830         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3831                 /* leave room for at least 1024M GTT */
3832                 dev_warn(rdev->dev, "limiting VRAM\n");
3833                 mc->real_vram_size = 0xFFC0000000ULL;
3834                 mc->mc_vram_size = 0xFFC0000000ULL;
3835         }
3836         radeon_vram_location(rdev, &rdev->mc, 0);
3837         rdev->mc.gtt_base_align = 0;
3838         radeon_gtt_location(rdev, mc);
3839 }
3840
3841 static int si_mc_init(struct radeon_device *rdev)
3842 {
3843         u32 tmp;
3844         int chansize, numchan;
3845
3846         /* Get VRAM informations */
3847         rdev->mc.vram_is_ddr = true;
3848         tmp = RREG32(MC_ARB_RAMCFG);
3849         if (tmp & CHANSIZE_OVERRIDE) {
3850                 chansize = 16;
3851         } else if (tmp & CHANSIZE_MASK) {
3852                 chansize = 64;
3853         } else {
3854                 chansize = 32;
3855         }
3856         tmp = RREG32(MC_SHARED_CHMAP);
3857         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3858         case 0:
3859         default:
3860                 numchan = 1;
3861                 break;
3862         case 1:
3863                 numchan = 2;
3864                 break;
3865         case 2:
3866                 numchan = 4;
3867                 break;
3868         case 3:
3869                 numchan = 8;
3870                 break;
3871         case 4:
3872                 numchan = 3;
3873                 break;
3874         case 5:
3875                 numchan = 6;
3876                 break;
3877         case 6:
3878                 numchan = 10;
3879                 break;
3880         case 7:
3881                 numchan = 12;
3882                 break;
3883         case 8:
3884                 numchan = 16;
3885                 break;
3886         }
3887         rdev->mc.vram_width = numchan * chansize;
3888         /* Could aper size report 0 ? */
3889         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3890         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3891         /* size in MB on si */
3892         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3893         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3894         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3895         si_vram_gtt_location(rdev, &rdev->mc);
3896         radeon_update_bandwidth_info(rdev);
3897
3898         return 0;
3899 }
3900
3901 /*
3902  * GART
3903  */
3904 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3905 {
3906         /* flush hdp cache */
3907         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3908
3909         /* bits 0-15 are the VM contexts0-15 */
3910         WREG32(VM_INVALIDATE_REQUEST, 1);
3911 }
3912
3913 static int si_pcie_gart_enable(struct radeon_device *rdev)
3914 {
3915         int r, i;
3916
3917         if (rdev->gart.robj == NULL) {
3918                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3919                 return -EINVAL;
3920         }
3921         r = radeon_gart_table_vram_pin(rdev);
3922         if (r)
3923                 return r;
3924         radeon_gart_restore(rdev);
3925         /* Setup TLB control */
3926         WREG32(MC_VM_MX_L1_TLB_CNTL,
3927                (0xA << 7) |
3928                ENABLE_L1_TLB |
3929                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3930                ENABLE_ADVANCED_DRIVER_MODEL |
3931                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3932         /* Setup L2 cache */
3933         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3934                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3935                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3936                EFFECTIVE_L2_QUEUE_SIZE(7) |
3937                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3938         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3939         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3940                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3941         /* setup context0 */
3942         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3943         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3944         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3945         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3946                         (u32)(rdev->dummy_page.addr >> 12));
3947         WREG32(VM_CONTEXT0_CNTL2, 0);
3948         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3949                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3950
3951         WREG32(0x15D4, 0);
3952         WREG32(0x15D8, 0);
3953         WREG32(0x15DC, 0);
3954
3955         /* empty context1-15 */
3956         /* set vm size, must be a multiple of 4 */
3957         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3958         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3959         /* Assign the pt base to something valid for now; the pts used for
3960          * the VMs are determined by the application and setup and assigned
3961          * on the fly in the vm part of radeon_gart.c
3962          */
3963         for (i = 1; i < 16; i++) {
3964                 if (i < 8)
3965                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3966                                rdev->gart.table_addr >> 12);
3967                 else
3968                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3969                                rdev->gart.table_addr >> 12);
3970         }
3971
3972         /* enable context1-15 */
3973         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3974                (u32)(rdev->dummy_page.addr >> 12));
3975         WREG32(VM_CONTEXT1_CNTL2, 4);
3976         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3977                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3983                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3985                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3987                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3988                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3989
3990         si_pcie_gart_tlb_flush(rdev);
3991         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3992                  (unsigned)(rdev->mc.gtt_size >> 20),
3993                  (unsigned long long)rdev->gart.table_addr);
3994         rdev->gart.ready = true;
3995         return 0;
3996 }
3997
3998 static void si_pcie_gart_disable(struct radeon_device *rdev)
3999 {
4000         /* Disable all tables */
4001         WREG32(VM_CONTEXT0_CNTL, 0);
4002         WREG32(VM_CONTEXT1_CNTL, 0);
4003         /* Setup TLB control */
4004         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4005                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4006         /* Setup L2 cache */
4007         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4008                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4009                EFFECTIVE_L2_QUEUE_SIZE(7) |
4010                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4011         WREG32(VM_L2_CNTL2, 0);
4012         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4013                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4014         radeon_gart_table_vram_unpin(rdev);
4015 }
4016
4017 static void si_pcie_gart_fini(struct radeon_device *rdev)
4018 {
4019         si_pcie_gart_disable(rdev);
4020         radeon_gart_table_vram_free(rdev);
4021         radeon_gart_fini(rdev);
4022 }
4023
4024 /* vm parser */
4025 static bool si_vm_reg_valid(u32 reg)
4026 {
4027         /* context regs are fine */
4028         if (reg >= 0x28000)
4029                 return true;
4030
4031         /* check config regs */
4032         switch (reg) {
4033         case GRBM_GFX_INDEX:
4034         case CP_STRMOUT_CNTL:
4035         case VGT_VTX_VECT_EJECT_REG:
4036         case VGT_CACHE_INVALIDATION:
4037         case VGT_ESGS_RING_SIZE:
4038         case VGT_GSVS_RING_SIZE:
4039         case VGT_GS_VERTEX_REUSE:
4040         case VGT_PRIMITIVE_TYPE:
4041         case VGT_INDEX_TYPE:
4042         case VGT_NUM_INDICES:
4043         case VGT_NUM_INSTANCES:
4044         case VGT_TF_RING_SIZE:
4045         case VGT_HS_OFFCHIP_PARAM:
4046         case VGT_TF_MEMORY_BASE:
4047         case PA_CL_ENHANCE:
4048         case PA_SU_LINE_STIPPLE_VALUE:
4049         case PA_SC_LINE_STIPPLE_STATE:
4050         case PA_SC_ENHANCE:
4051         case SQC_CACHES:
4052         case SPI_STATIC_THREAD_MGMT_1:
4053         case SPI_STATIC_THREAD_MGMT_2:
4054         case SPI_STATIC_THREAD_MGMT_3:
4055         case SPI_PS_MAX_WAVE_ID:
4056         case SPI_CONFIG_CNTL:
4057         case SPI_CONFIG_CNTL_1:
4058         case TA_CNTL_AUX:
4059                 return true;
4060         default:
4061                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4062                 return false;
4063         }
4064 }
4065
4066 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4067                                   u32 *ib, struct radeon_cs_packet *pkt)
4068 {
4069         switch (pkt->opcode) {
4070         case PACKET3_NOP:
4071         case PACKET3_SET_BASE:
4072         case PACKET3_SET_CE_DE_COUNTERS:
4073         case PACKET3_LOAD_CONST_RAM:
4074         case PACKET3_WRITE_CONST_RAM:
4075         case PACKET3_WRITE_CONST_RAM_OFFSET:
4076         case PACKET3_DUMP_CONST_RAM:
4077         case PACKET3_INCREMENT_CE_COUNTER:
4078         case PACKET3_WAIT_ON_DE_COUNTER:
4079         case PACKET3_CE_WRITE:
4080                 break;
4081         default:
4082                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4083                 return -EINVAL;
4084         }
4085         return 0;
4086 }
4087
4088 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4089                                    u32 *ib, struct radeon_cs_packet *pkt)
4090 {
4091         u32 idx = pkt->idx + 1;
4092         u32 idx_value = ib[idx];
4093         u32 start_reg, end_reg, reg, i;
4094         u32 command, info;
4095
4096         switch (pkt->opcode) {
4097         case PACKET3_NOP:
4098         case PACKET3_SET_BASE:
4099         case PACKET3_CLEAR_STATE:
4100         case PACKET3_INDEX_BUFFER_SIZE:
4101         case PACKET3_DISPATCH_DIRECT:
4102         case PACKET3_DISPATCH_INDIRECT:
4103         case PACKET3_ALLOC_GDS:
4104         case PACKET3_WRITE_GDS_RAM:
4105         case PACKET3_ATOMIC_GDS:
4106         case PACKET3_ATOMIC:
4107         case PACKET3_OCCLUSION_QUERY:
4108         case PACKET3_SET_PREDICATION:
4109         case PACKET3_COND_EXEC:
4110         case PACKET3_PRED_EXEC:
4111         case PACKET3_DRAW_INDIRECT:
4112         case PACKET3_DRAW_INDEX_INDIRECT:
4113         case PACKET3_INDEX_BASE:
4114         case PACKET3_DRAW_INDEX_2:
4115         case PACKET3_CONTEXT_CONTROL:
4116         case PACKET3_INDEX_TYPE:
4117         case PACKET3_DRAW_INDIRECT_MULTI:
4118         case PACKET3_DRAW_INDEX_AUTO:
4119         case PACKET3_DRAW_INDEX_IMMD:
4120         case PACKET3_NUM_INSTANCES:
4121         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4122         case PACKET3_STRMOUT_BUFFER_UPDATE:
4123         case PACKET3_DRAW_INDEX_OFFSET_2:
4124         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4125         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4126         case PACKET3_MPEG_INDEX:
4127         case PACKET3_WAIT_REG_MEM:
4128         case PACKET3_MEM_WRITE:
4129         case PACKET3_PFP_SYNC_ME:
4130         case PACKET3_SURFACE_SYNC:
4131         case PACKET3_EVENT_WRITE:
4132         case PACKET3_EVENT_WRITE_EOP:
4133         case PACKET3_EVENT_WRITE_EOS:
4134         case PACKET3_SET_CONTEXT_REG:
4135         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4136         case PACKET3_SET_SH_REG:
4137         case PACKET3_SET_SH_REG_OFFSET:
4138         case PACKET3_INCREMENT_DE_COUNTER:
4139         case PACKET3_WAIT_ON_CE_COUNTER:
4140         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4141         case PACKET3_ME_WRITE:
4142                 break;
4143         case PACKET3_COPY_DATA:
4144                 if ((idx_value & 0xf00) == 0) {
4145                         reg = ib[idx + 3] * 4;
4146                         if (!si_vm_reg_valid(reg))
4147                                 return -EINVAL;
4148                 }
4149                 break;
4150         case PACKET3_WRITE_DATA:
4151                 if ((idx_value & 0xf00) == 0) {
4152                         start_reg = ib[idx + 1] * 4;
4153                         if (idx_value & 0x10000) {
4154                                 if (!si_vm_reg_valid(start_reg))
4155                                         return -EINVAL;
4156                         } else {
4157                                 for (i = 0; i < (pkt->count - 2); i++) {
4158                                         reg = start_reg + (4 * i);
4159                                         if (!si_vm_reg_valid(reg))
4160                                                 return -EINVAL;
4161                                 }
4162                         }
4163                 }
4164                 break;
4165         case PACKET3_COND_WRITE:
4166                 if (idx_value & 0x100) {
4167                         reg = ib[idx + 5] * 4;
4168                         if (!si_vm_reg_valid(reg))
4169                                 return -EINVAL;
4170                 }
4171                 break;
4172         case PACKET3_COPY_DW:
4173                 if (idx_value & 0x2) {
4174                         reg = ib[idx + 3] * 4;
4175                         if (!si_vm_reg_valid(reg))
4176                                 return -EINVAL;
4177                 }
4178                 break;
4179         case PACKET3_SET_CONFIG_REG:
4180                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4181                 end_reg = 4 * pkt->count + start_reg - 4;
4182                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4183                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4184                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4185                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4186                         return -EINVAL;
4187                 }
4188                 for (i = 0; i < pkt->count; i++) {
4189                         reg = start_reg + (4 * i);
4190                         if (!si_vm_reg_valid(reg))
4191                                 return -EINVAL;
4192                 }
4193                 break;
4194         case PACKET3_CP_DMA:
4195                 command = ib[idx + 4];
4196                 info = ib[idx + 1];
4197                 if (command & PACKET3_CP_DMA_CMD_SAS) {
4198                         /* src address space is register */
4199                         if (((info & 0x60000000) >> 29) == 0) {
4200                                 start_reg = idx_value << 2;
4201                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
4202                                         reg = start_reg;
4203                                         if (!si_vm_reg_valid(reg)) {
4204                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4205                                                 return -EINVAL;
4206                                         }
4207                                 } else {
4208                                         for (i = 0; i < (command & 0x1fffff); i++) {
4209                                                 reg = start_reg + (4 * i);
4210                                                 if (!si_vm_reg_valid(reg)) {
4211                                                         DRM_ERROR("CP DMA Bad SRC register\n");
4212                                                         return -EINVAL;
4213                                                 }
4214                                         }
4215                                 }
4216                         }
4217                 }
4218                 if (command & PACKET3_CP_DMA_CMD_DAS) {
4219                         /* dst address space is register */
4220                         if (((info & 0x00300000) >> 20) == 0) {
4221                                 start_reg = ib[idx + 2];
4222                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
4223                                         reg = start_reg;
4224                                         if (!si_vm_reg_valid(reg)) {
4225                                                 DRM_ERROR("CP DMA Bad DST register\n");
4226                                                 return -EINVAL;
4227                                         }
4228                                 } else {
4229                                         for (i = 0; i < (command & 0x1fffff); i++) {
4230                                                 reg = start_reg + (4 * i);
4231                                                 if (!si_vm_reg_valid(reg)) {
4232                                                         DRM_ERROR("CP DMA Bad DST register\n");
4233                                                         return -EINVAL;
4234                                                 }
4235                                         }
4236                                 }
4237                         }
4238                 }
4239                 break;
4240         default:
4241                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4242                 return -EINVAL;
4243         }
4244         return 0;
4245 }
4246
4247 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4248                                        u32 *ib, struct radeon_cs_packet *pkt)
4249 {
4250         u32 idx = pkt->idx + 1;
4251         u32 idx_value = ib[idx];
4252         u32 start_reg, reg, i;
4253
4254         switch (pkt->opcode) {
4255         case PACKET3_NOP:
4256         case PACKET3_SET_BASE:
4257         case PACKET3_CLEAR_STATE:
4258         case PACKET3_DISPATCH_DIRECT:
4259         case PACKET3_DISPATCH_INDIRECT:
4260         case PACKET3_ALLOC_GDS:
4261         case PACKET3_WRITE_GDS_RAM:
4262         case PACKET3_ATOMIC_GDS:
4263         case PACKET3_ATOMIC:
4264         case PACKET3_OCCLUSION_QUERY:
4265         case PACKET3_SET_PREDICATION:
4266         case PACKET3_COND_EXEC:
4267         case PACKET3_PRED_EXEC:
4268         case PACKET3_CONTEXT_CONTROL:
4269         case PACKET3_STRMOUT_BUFFER_UPDATE:
4270         case PACKET3_WAIT_REG_MEM:
4271         case PACKET3_MEM_WRITE:
4272         case PACKET3_PFP_SYNC_ME:
4273         case PACKET3_SURFACE_SYNC:
4274         case PACKET3_EVENT_WRITE:
4275         case PACKET3_EVENT_WRITE_EOP:
4276         case PACKET3_EVENT_WRITE_EOS:
4277         case PACKET3_SET_CONTEXT_REG:
4278         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4279         case PACKET3_SET_SH_REG:
4280         case PACKET3_SET_SH_REG_OFFSET:
4281         case PACKET3_INCREMENT_DE_COUNTER:
4282         case PACKET3_WAIT_ON_CE_COUNTER:
4283         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4284         case PACKET3_ME_WRITE:
4285                 break;
4286         case PACKET3_COPY_DATA:
4287                 if ((idx_value & 0xf00) == 0) {
4288                         reg = ib[idx + 3] * 4;
4289                         if (!si_vm_reg_valid(reg))
4290                                 return -EINVAL;
4291                 }
4292                 break;
4293         case PACKET3_WRITE_DATA:
4294                 if ((idx_value & 0xf00) == 0) {
4295                         start_reg = ib[idx + 1] * 4;
4296                         if (idx_value & 0x10000) {
4297                                 if (!si_vm_reg_valid(start_reg))
4298                                         return -EINVAL;
4299                         } else {
4300                                 for (i = 0; i < (pkt->count - 2); i++) {
4301                                         reg = start_reg + (4 * i);
4302                                         if (!si_vm_reg_valid(reg))
4303                                                 return -EINVAL;
4304                                 }
4305                         }
4306                 }
4307                 break;
4308         case PACKET3_COND_WRITE:
4309                 if (idx_value & 0x100) {
4310                         reg = ib[idx + 5] * 4;
4311                         if (!si_vm_reg_valid(reg))
4312                                 return -EINVAL;
4313                 }
4314                 break;
4315         case PACKET3_COPY_DW:
4316                 if (idx_value & 0x2) {
4317                         reg = ib[idx + 3] * 4;
4318                         if (!si_vm_reg_valid(reg))
4319                                 return -EINVAL;
4320                 }
4321                 break;
4322         default:
4323                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4324                 return -EINVAL;
4325         }
4326         return 0;
4327 }
4328
4329 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4330 {
4331         int ret = 0;
4332         u32 idx = 0;
4333         struct radeon_cs_packet pkt;
4334
4335         do {
4336                 pkt.idx = idx;
4337                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4338                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4339                 pkt.one_reg_wr = 0;
4340                 switch (pkt.type) {
4341                 case RADEON_PACKET_TYPE0:
4342                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4343                         ret = -EINVAL;
4344                         break;
4345                 case RADEON_PACKET_TYPE2:
4346                         idx += 1;
4347                         break;
4348                 case RADEON_PACKET_TYPE3:
4349                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4350                         if (ib->is_const_ib)
4351                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4352                         else {
4353                                 switch (ib->ring) {
4354                                 case RADEON_RING_TYPE_GFX_INDEX:
4355                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4356                                         break;
4357                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4358                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4359                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4360                                         break;
4361                                 default:
4362                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4363                                         ret = -EINVAL;
4364                                         break;
4365                                 }
4366                         }
4367                         idx += pkt.count + 2;
4368                         break;
4369                 default:
4370                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4371                         ret = -EINVAL;
4372                         break;
4373                 }
4374                 if (ret)
4375                         break;
4376         } while (idx < ib->length_dw);
4377
4378         return ret;
4379 }
4380
4381 /*
4382  * vm
4383  */
4384 int si_vm_init(struct radeon_device *rdev)
4385 {
4386         /* number of VMs */
4387         rdev->vm_manager.nvm = 16;
4388         /* base offset of vram pages */
4389         rdev->vm_manager.vram_base_offset = 0;
4390
4391         return 0;
4392 }
4393
4394 void si_vm_fini(struct radeon_device *rdev)
4395 {
4396 }
4397
4398 /**
4399  * si_vm_decode_fault - print human readable fault info
4400  *
4401  * @rdev: radeon_device pointer
4402  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4403  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4404  *
4405  * Print human readable fault information (SI).
4406  */
4407 static void si_vm_decode_fault(struct radeon_device *rdev,
4408                                u32 status, u32 addr)
4409 {
4410         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4411         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4412         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4413         char *block;
4414
4415         if (rdev->family == CHIP_TAHITI) {
4416                 switch (mc_id) {
4417                 case 160:
4418                 case 144:
4419                 case 96:
4420                 case 80:
4421                 case 224:
4422                 case 208:
4423                 case 32:
4424                 case 16:
4425                         block = "CB";
4426                         break;
4427                 case 161:
4428                 case 145:
4429                 case 97:
4430                 case 81:
4431                 case 225:
4432                 case 209:
4433                 case 33:
4434                 case 17:
4435                         block = "CB_FMASK";
4436                         break;
4437                 case 162:
4438                 case 146:
4439                 case 98:
4440                 case 82:
4441                 case 226:
4442                 case 210:
4443                 case 34:
4444                 case 18:
4445                         block = "CB_CMASK";
4446                         break;
4447                 case 163:
4448                 case 147:
4449                 case 99:
4450                 case 83:
4451                 case 227:
4452                 case 211:
4453                 case 35:
4454                 case 19:
4455                         block = "CB_IMMED";
4456                         break;
4457                 case 164:
4458                 case 148:
4459                 case 100:
4460                 case 84:
4461                 case 228:
4462                 case 212:
4463                 case 36:
4464                 case 20:
4465                         block = "DB";
4466                         break;
4467                 case 165:
4468                 case 149:
4469                 case 101:
4470                 case 85:
4471                 case 229:
4472                 case 213:
4473                 case 37:
4474                 case 21:
4475                         block = "DB_HTILE";
4476                         break;
4477                 case 167:
4478                 case 151:
4479                 case 103:
4480                 case 87:
4481                 case 231:
4482                 case 215:
4483                 case 39:
4484                 case 23:
4485                         block = "DB_STEN";
4486                         break;
4487                 case 72:
4488                 case 68:
4489                 case 64:
4490                 case 8:
4491                 case 4:
4492                 case 0:
4493                 case 136:
4494                 case 132:
4495                 case 128:
4496                 case 200:
4497                 case 196:
4498                 case 192:
4499                         block = "TC";
4500                         break;
4501                 case 112:
4502                 case 48:
4503                         block = "CP";
4504                         break;
4505                 case 49:
4506                 case 177:
4507                 case 50:
4508                 case 178:
4509                         block = "SH";
4510                         break;
4511                 case 53:
4512                 case 190:
4513                         block = "VGT";
4514                         break;
4515                 case 117:
4516                         block = "IH";
4517                         break;
4518                 case 51:
4519                 case 115:
4520                         block = "RLC";
4521                         break;
4522                 case 119:
4523                 case 183:
4524                         block = "DMA0";
4525                         break;
4526                 case 61:
4527                         block = "DMA1";
4528                         break;
4529                 case 248:
4530                 case 120:
4531                         block = "HDP";
4532                         break;
4533                 default:
4534                         block = "unknown";
4535                         break;
4536                 }
4537         } else {
4538                 switch (mc_id) {
4539                 case 32:
4540                 case 16:
4541                 case 96:
4542                 case 80:
4543                 case 160:
4544                 case 144:
4545                 case 224:
4546                 case 208:
4547                         block = "CB";
4548                         break;
4549                 case 33:
4550                 case 17:
4551                 case 97:
4552                 case 81:
4553                 case 161:
4554                 case 145:
4555                 case 225:
4556                 case 209:
4557                         block = "CB_FMASK";
4558                         break;
4559                 case 34:
4560                 case 18:
4561                 case 98:
4562                 case 82:
4563                 case 162:
4564                 case 146:
4565                 case 226:
4566                 case 210:
4567                         block = "CB_CMASK";
4568                         break;
4569                 case 35:
4570                 case 19:
4571                 case 99:
4572                 case 83:
4573                 case 163:
4574                 case 147:
4575                 case 227:
4576                 case 211:
4577                         block = "CB_IMMED";
4578                         break;
4579                 case 36:
4580                 case 20:
4581                 case 100:
4582                 case 84:
4583                 case 164:
4584                 case 148:
4585                 case 228:
4586                 case 212:
4587                         block = "DB";
4588                         break;
4589                 case 37:
4590                 case 21:
4591                 case 101:
4592                 case 85:
4593                 case 165:
4594                 case 149:
4595                 case 229:
4596                 case 213:
4597                         block = "DB_HTILE";
4598                         break;
4599                 case 39:
4600                 case 23:
4601                 case 103:
4602                 case 87:
4603                 case 167:
4604                 case 151:
4605                 case 231:
4606                 case 215:
4607                         block = "DB_STEN";
4608                         break;
4609                 case 72:
4610                 case 68:
4611                 case 8:
4612                 case 4:
4613                 case 136:
4614                 case 132:
4615                 case 200:
4616                 case 196:
4617                         block = "TC";
4618                         break;
4619                 case 112:
4620                 case 48:
4621                         block = "CP";
4622                         break;
4623                 case 49:
4624                 case 177:
4625                 case 50:
4626                 case 178:
4627                         block = "SH";
4628                         break;
4629                 case 53:
4630                         block = "VGT";
4631                         break;
4632                 case 117:
4633                         block = "IH";
4634                         break;
4635                 case 51:
4636                 case 115:
4637                         block = "RLC";
4638                         break;
4639                 case 119:
4640                 case 183:
4641                         block = "DMA0";
4642                         break;
4643                 case 61:
4644                         block = "DMA1";
4645                         break;
4646                 case 248:
4647                 case 120:
4648                         block = "HDP";
4649                         break;
4650                 default:
4651                         block = "unknown";
4652                         break;
4653                 }
4654         }
4655
4656         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4657                protections, vmid, addr,
4658                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4659                block, mc_id);
4660 }
4661
4662 /**
4663  * si_vm_set_page - update the page tables using the CP
4664  *
4665  * @rdev: radeon_device pointer
4666  * @ib: indirect buffer to fill with commands
4667  * @pe: addr of the page entry
4668  * @addr: dst addr to write into pe
4669  * @count: number of page entries to update
4670  * @incr: increase next addr by incr bytes
4671  * @flags: access flags
4672  *
4673  * Update the page tables using the CP (SI).
4674  */
4675 void si_vm_set_page(struct radeon_device *rdev,
4676                     struct radeon_ib *ib,
4677                     uint64_t pe,
4678                     uint64_t addr, unsigned count,
4679                     uint32_t incr, uint32_t flags)
4680 {
4681         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4682         uint64_t value;
4683         unsigned ndw;
4684
4685         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4686                 while (count) {
4687                         ndw = 2 + count * 2;
4688                         if (ndw > 0x3FFE)
4689                                 ndw = 0x3FFE;
4690
4691                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4692                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4693                                         WRITE_DATA_DST_SEL(1));
4694                         ib->ptr[ib->length_dw++] = pe;
4695                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4696                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4697                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4698                                         value = radeon_vm_map_gart(rdev, addr);
4699                                         value &= 0xFFFFFFFFFFFFF000ULL;
4700                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4701                                         value = addr;
4702                                 } else {
4703                                         value = 0;
4704                                 }
4705                                 addr += incr;
4706                                 value |= r600_flags;
4707                                 ib->ptr[ib->length_dw++] = value;
4708                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4709                         }
4710                 }
4711         } else {
4712                 /* DMA */
4713                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4714                         while (count) {
4715                                 ndw = count * 2;
4716                                 if (ndw > 0xFFFFE)
4717                                         ndw = 0xFFFFE;
4718
4719                                 /* for non-physically contiguous pages (system) */
4720                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4721                                 ib->ptr[ib->length_dw++] = pe;
4722                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4723                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4724                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4725                                                 value = radeon_vm_map_gart(rdev, addr);
4726                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4727                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4728                                                 value = addr;
4729                                         } else {
4730                                                 value = 0;
4731                                         }
4732                                         addr += incr;
4733                                         value |= r600_flags;
4734                                         ib->ptr[ib->length_dw++] = value;
4735                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4736                                 }
4737                         }
4738                 } else {
4739                         while (count) {
4740                                 ndw = count * 2;
4741                                 if (ndw > 0xFFFFE)
4742                                         ndw = 0xFFFFE;
4743
4744                                 if (flags & RADEON_VM_PAGE_VALID)
4745                                         value = addr;
4746                                 else
4747                                         value = 0;
4748                                 /* for physically contiguous pages (vram) */
4749                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4750                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4751                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4752                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4753                                 ib->ptr[ib->length_dw++] = 0;
4754                                 ib->ptr[ib->length_dw++] = value; /* value */
4755                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4756                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4757                                 ib->ptr[ib->length_dw++] = 0;
4758                                 pe += ndw * 4;
4759                                 addr += (ndw / 2) * incr;
4760                                 count -= ndw / 2;
4761                         }
4762                 }
4763                 while (ib->length_dw & 0x7)
4764                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4765         }
4766 }
4767
4768 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4769 {
4770         struct radeon_ring *ring = &rdev->ring[ridx];
4771
4772         if (vm == NULL)
4773                 return;
4774
4775         /* write new base address */
4776         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4777         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4778                                  WRITE_DATA_DST_SEL(0)));
4779
4780         if (vm->id < 8) {
4781                 radeon_ring_write(ring,
4782                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4783         } else {
4784                 radeon_ring_write(ring,
4785                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4786         }
4787         radeon_ring_write(ring, 0);
4788         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4789
4790         /* flush hdp cache */
4791         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4792         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793                                  WRITE_DATA_DST_SEL(0)));
4794         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4795         radeon_ring_write(ring, 0);
4796         radeon_ring_write(ring, 0x1);
4797
4798         /* bits 0-15 are the VM contexts0-15 */
4799         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4800         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4801                                  WRITE_DATA_DST_SEL(0)));
4802         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4803         radeon_ring_write(ring, 0);
4804         radeon_ring_write(ring, 1 << vm->id);
4805
4806         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4807         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4808         radeon_ring_write(ring, 0x0);
4809 }
4810
4811 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4812 {
4813         struct radeon_ring *ring = &rdev->ring[ridx];
4814
4815         if (vm == NULL)
4816                 return;
4817
4818         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4819         if (vm->id < 8) {
4820                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4821         } else {
4822                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4823         }
4824         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4825
4826         /* flush hdp cache */
4827         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4828         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4829         radeon_ring_write(ring, 1);
4830
4831         /* bits 0-7 are the VM contexts0-7 */
4832         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4833         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4834         radeon_ring_write(ring, 1 << vm->id);
4835 }
4836
4837 /*
4838  *  Power and clock gating
4839  */
4840 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4841 {
4842         int i;
4843
4844         for (i = 0; i < rdev->usec_timeout; i++) {
4845                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4846                         break;
4847                 udelay(1);
4848         }
4849
4850         for (i = 0; i < rdev->usec_timeout; i++) {
4851                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4852                         break;
4853                 udelay(1);
4854         }
4855 }
4856
4857 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4858                                          bool enable)
4859 {
4860         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4861         u32 mask;
4862         int i;
4863
4864         if (enable)
4865                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4866         else
4867                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4868         WREG32(CP_INT_CNTL_RING0, tmp);
4869
4870         if (!enable) {
4871                 /* read a gfx register */
4872                 tmp = RREG32(DB_DEPTH_INFO);
4873
4874                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4875                 for (i = 0; i < rdev->usec_timeout; i++) {
4876                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4877                                 break;
4878                         udelay(1);
4879                 }
4880         }
4881 }
4882
4883 static void si_set_uvd_dcm(struct radeon_device *rdev,
4884                            bool sw_mode)
4885 {
4886         u32 tmp, tmp2;
4887
4888         tmp = RREG32(UVD_CGC_CTRL);
4889         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4890         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4891
4892         if (sw_mode) {
4893                 tmp &= ~0x7ffff800;
4894                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4895         } else {
4896                 tmp |= 0x7ffff800;
4897                 tmp2 = 0;
4898         }
4899
4900         WREG32(UVD_CGC_CTRL, tmp);
4901         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4902 }
4903
4904 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4905 {
4906         bool hw_mode = true;
4907
4908         if (hw_mode) {
4909                 si_set_uvd_dcm(rdev, false);
4910         } else {
4911                 u32 tmp = RREG32(UVD_CGC_CTRL);
4912                 tmp &= ~DCM;
4913                 WREG32(UVD_CGC_CTRL, tmp);
4914         }
4915 }
4916
4917 static u32 si_halt_rlc(struct radeon_device *rdev)
4918 {
4919         u32 data, orig;
4920
4921         orig = data = RREG32(RLC_CNTL);
4922
4923         if (data & RLC_ENABLE) {
4924                 data &= ~RLC_ENABLE;
4925                 WREG32(RLC_CNTL, data);
4926
4927                 si_wait_for_rlc_serdes(rdev);
4928         }
4929
4930         return orig;
4931 }
4932
4933 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4934 {
4935         u32 tmp;
4936
4937         tmp = RREG32(RLC_CNTL);
4938         if (tmp != rlc)
4939                 WREG32(RLC_CNTL, rlc);
4940 }
4941
4942 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4943 {
4944         u32 data, orig;
4945
4946         orig = data = RREG32(DMA_PG);
4947         if (enable)
4948                 data |= PG_CNTL_ENABLE;
4949         else
4950                 data &= ~PG_CNTL_ENABLE;
4951         if (orig != data)
4952                 WREG32(DMA_PG, data);
4953 }
4954
4955 static void si_init_dma_pg(struct radeon_device *rdev)
4956 {
4957         u32 tmp;
4958
4959         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4960         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4961
4962         for (tmp = 0; tmp < 5; tmp++)
4963                 WREG32(DMA_PGFSM_WRITE, 0);
4964 }
4965
4966 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4967                                bool enable)
4968 {
4969         u32 tmp;
4970
4971         if (enable) {
4972                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4973                 WREG32(RLC_TTOP_D, tmp);
4974
4975                 tmp = RREG32(RLC_PG_CNTL);
4976                 tmp |= GFX_PG_ENABLE;
4977                 WREG32(RLC_PG_CNTL, tmp);
4978
4979                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4980                 tmp |= AUTO_PG_EN;
4981                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4982         } else {
4983                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4984                 tmp &= ~AUTO_PG_EN;
4985                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4986
4987                 tmp = RREG32(DB_RENDER_CONTROL);
4988         }
4989 }
4990
4991 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4992 {
4993         u32 tmp;
4994
4995         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4996
4997         tmp = RREG32(RLC_PG_CNTL);
4998         tmp |= GFX_PG_SRC;
4999         WREG32(RLC_PG_CNTL, tmp);
5000
5001         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5002
5003         tmp = RREG32(RLC_AUTO_PG_CTRL);
5004
5005         tmp &= ~GRBM_REG_SGIT_MASK;
5006         tmp |= GRBM_REG_SGIT(0x700);
5007         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5008         WREG32(RLC_AUTO_PG_CTRL, tmp);
5009 }
5010
5011 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5012 {
5013         u32 mask = 0, tmp, tmp1;
5014         int i;
5015
5016         si_select_se_sh(rdev, se, sh);
5017         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5018         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5019         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5020
5021         tmp &= 0xffff0000;
5022
5023         tmp |= tmp1;
5024         tmp >>= 16;
5025
5026         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5027                 mask <<= 1;
5028                 mask |= 1;
5029         }
5030
5031         return (~tmp) & mask;
5032 }
5033
5034 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5035 {
5036         u32 i, j, k, active_cu_number = 0;
5037         u32 mask, counter, cu_bitmap;
5038         u32 tmp = 0;
5039
5040         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5041                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5042                         mask = 1;
5043                         cu_bitmap = 0;
5044                         counter  = 0;
5045                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5046                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5047                                         if (counter < 2)
5048                                                 cu_bitmap |= mask;
5049                                         counter++;
5050                                 }
5051                                 mask <<= 1;
5052                         }
5053
5054                         active_cu_number += counter;
5055                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5056                 }
5057         }
5058
5059         WREG32(RLC_PG_AO_CU_MASK, tmp);
5060
5061         tmp = RREG32(RLC_MAX_PG_CU);
5062         tmp &= ~MAX_PU_CU_MASK;
5063         tmp |= MAX_PU_CU(active_cu_number);
5064         WREG32(RLC_MAX_PG_CU, tmp);
5065 }
5066
5067 static void si_enable_cgcg(struct radeon_device *rdev,
5068                            bool enable)
5069 {
5070         u32 data, orig, tmp;
5071
5072         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5073
5074         si_enable_gui_idle_interrupt(rdev, enable);
5075
5076         if (enable) {
5077                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5078
5079                 tmp = si_halt_rlc(rdev);
5080
5081                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5082                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5083                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5084
5085                 si_wait_for_rlc_serdes(rdev);
5086
5087                 si_update_rlc(rdev, tmp);
5088
5089                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5090
5091                 data |= CGCG_EN | CGLS_EN;
5092         } else {
5093                 RREG32(CB_CGTT_SCLK_CTRL);
5094                 RREG32(CB_CGTT_SCLK_CTRL);
5095                 RREG32(CB_CGTT_SCLK_CTRL);
5096                 RREG32(CB_CGTT_SCLK_CTRL);
5097
5098                 data &= ~(CGCG_EN | CGLS_EN);
5099         }
5100
5101         if (orig != data)
5102                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5103 }
5104
5105 static void si_enable_mgcg(struct radeon_device *rdev,
5106                            bool enable)
5107 {
5108         u32 data, orig, tmp = 0;
5109
5110         if (enable) {
5111                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5112                 data = 0x96940200;
5113                 if (orig != data)
5114                         WREG32(CGTS_SM_CTRL_REG, data);
5115
5116                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5117                 data |= CP_MEM_LS_EN;
5118                 if (orig != data)
5119                         WREG32(CP_MEM_SLP_CNTL, data);
5120
5121                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5122                 data &= 0xffffffc0;
5123                 if (orig != data)
5124                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5125
5126                 tmp = si_halt_rlc(rdev);
5127
5128                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5129                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5130                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5131
5132                 si_update_rlc(rdev, tmp);
5133         } else {
5134                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5135                 data |= 0x00000003;
5136                 if (orig != data)
5137                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5138
5139                 data = RREG32(CP_MEM_SLP_CNTL);
5140                 if (data & CP_MEM_LS_EN) {
5141                         data &= ~CP_MEM_LS_EN;
5142                         WREG32(CP_MEM_SLP_CNTL, data);
5143                 }
5144                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5145                 data |= LS_OVERRIDE | OVERRIDE;
5146                 if (orig != data)
5147                         WREG32(CGTS_SM_CTRL_REG, data);
5148
5149                 tmp = si_halt_rlc(rdev);
5150
5151                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5152                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5153                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5154
5155                 si_update_rlc(rdev, tmp);
5156         }
5157 }
5158
5159 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5160                                bool enable)
5161 {
5162         u32 orig, data, tmp;
5163
5164         if (enable) {
5165                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5166                 tmp |= 0x3fff;
5167                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5168
5169                 orig = data = RREG32(UVD_CGC_CTRL);
5170                 data |= DCM;
5171                 if (orig != data)
5172                         WREG32(UVD_CGC_CTRL, data);
5173
5174                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5175                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5176         } else {
5177                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5178                 tmp &= ~0x3fff;
5179                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5180
5181                 orig = data = RREG32(UVD_CGC_CTRL);
5182                 data &= ~DCM;
5183                 if (orig != data)
5184                         WREG32(UVD_CGC_CTRL, data);
5185
5186                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5187                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5188         }
5189 }
5190
5191 static const u32 mc_cg_registers[] =
5192 {
5193         MC_HUB_MISC_HUB_CG,
5194         MC_HUB_MISC_SIP_CG,
5195         MC_HUB_MISC_VM_CG,
5196         MC_XPB_CLK_GAT,
5197         ATC_MISC_CG,
5198         MC_CITF_MISC_WR_CG,
5199         MC_CITF_MISC_RD_CG,
5200         MC_CITF_MISC_VM_CG,
5201         VM_L2_CG,
5202 };
5203
5204 static void si_enable_mc_ls(struct radeon_device *rdev,
5205                             bool enable)
5206 {
5207         int i;
5208         u32 orig, data;
5209
5210         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5211                 orig = data = RREG32(mc_cg_registers[i]);
5212                 if (enable)
5213                         data |= MC_LS_ENABLE;
5214                 else
5215                         data &= ~MC_LS_ENABLE;
5216                 if (data != orig)
5217                         WREG32(mc_cg_registers[i], data);
5218         }
5219 }
5220
5221
5222 static void si_init_cg(struct radeon_device *rdev)
5223 {
5224         si_enable_mgcg(rdev, true);
5225         si_enable_cgcg(rdev, false);
5226         /* disable MC LS on Tahiti */
5227         if (rdev->family == CHIP_TAHITI)
5228                 si_enable_mc_ls(rdev, false);
5229         if (rdev->has_uvd) {
5230                 si_enable_uvd_mgcg(rdev, true);
5231                 si_init_uvd_internal_cg(rdev);
5232         }
5233 }
5234
5235 static void si_fini_cg(struct radeon_device *rdev)
5236 {
5237         if (rdev->has_uvd)
5238                 si_enable_uvd_mgcg(rdev, false);
5239         si_enable_cgcg(rdev, false);
5240         si_enable_mgcg(rdev, false);
5241 }
5242
5243 static void si_init_pg(struct radeon_device *rdev)
5244 {
5245         bool has_pg = false;
5246 #if 0
5247         /* only cape verde supports PG */
5248         if (rdev->family == CHIP_VERDE)
5249                 has_pg = true;
5250 #endif
5251         if (has_pg) {
5252                 si_init_ao_cu_mask(rdev);
5253                 si_init_dma_pg(rdev);
5254                 si_enable_dma_pg(rdev, true);
5255                 si_init_gfx_cgpg(rdev);
5256                 si_enable_gfx_cgpg(rdev, true);
5257         } else {
5258                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5259                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5260         }
5261 }
5262
5263 static void si_fini_pg(struct radeon_device *rdev)
5264 {
5265         bool has_pg = false;
5266
5267         /* only cape verde supports PG */
5268         if (rdev->family == CHIP_VERDE)
5269                 has_pg = true;
5270
5271         if (has_pg) {
5272                 si_enable_dma_pg(rdev, false);
5273                 si_enable_gfx_cgpg(rdev, false);
5274         }
5275 }
5276
5277 /*
5278  * RLC
5279  */
5280 void si_rlc_reset(struct radeon_device *rdev)
5281 {
5282         u32 tmp = RREG32(GRBM_SOFT_RESET);
5283
5284         tmp |= SOFT_RESET_RLC;
5285         WREG32(GRBM_SOFT_RESET, tmp);
5286         udelay(50);
5287         tmp &= ~SOFT_RESET_RLC;
5288         WREG32(GRBM_SOFT_RESET, tmp);
5289         udelay(50);
5290 }
5291
5292 static void si_rlc_stop(struct radeon_device *rdev)
5293 {
5294         WREG32(RLC_CNTL, 0);
5295
5296         si_enable_gui_idle_interrupt(rdev, false);
5297
5298         si_wait_for_rlc_serdes(rdev);
5299 }
5300
5301 static void si_rlc_start(struct radeon_device *rdev)
5302 {
5303         WREG32(RLC_CNTL, RLC_ENABLE);
5304
5305         si_enable_gui_idle_interrupt(rdev, true);
5306
5307         udelay(50);
5308 }
5309
5310 static bool si_lbpw_supported(struct radeon_device *rdev)
5311 {
5312         u32 tmp;
5313
5314         /* Enable LBPW only for DDR3 */
5315         tmp = RREG32(MC_SEQ_MISC0);
5316         if ((tmp & 0xF0000000) == 0xB0000000)
5317                 return true;
5318         return false;
5319 }
5320
5321 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5322 {
5323         u32 tmp;
5324
5325         tmp = RREG32(RLC_LB_CNTL);
5326         if (enable)
5327                 tmp |= LOAD_BALANCE_ENABLE;
5328         else
5329                 tmp &= ~LOAD_BALANCE_ENABLE;
5330         WREG32(RLC_LB_CNTL, tmp);
5331
5332         if (!enable) {
5333                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5334                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5335         }
5336 }
5337
5338 static int si_rlc_resume(struct radeon_device *rdev)
5339 {
5340         u32 i;
5341         const __be32 *fw_data;
5342
5343         if (!rdev->rlc_fw)
5344                 return -EINVAL;
5345
5346         si_rlc_stop(rdev);
5347
5348         si_rlc_reset(rdev);
5349
5350         si_init_pg(rdev);
5351
5352         si_init_cg(rdev);
5353
5354         WREG32(RLC_RL_BASE, 0);
5355         WREG32(RLC_RL_SIZE, 0);
5356         WREG32(RLC_LB_CNTL, 0);
5357         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5358         WREG32(RLC_LB_CNTR_INIT, 0);
5359         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5360
5361         WREG32(RLC_MC_CNTL, 0);
5362         WREG32(RLC_UCODE_CNTL, 0);
5363
5364         fw_data = (const __be32 *)rdev->rlc_fw->data;
5365         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5366                 WREG32(RLC_UCODE_ADDR, i);
5367                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5368         }
5369         WREG32(RLC_UCODE_ADDR, 0);
5370
5371         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5372
5373         si_rlc_start(rdev);
5374
5375         return 0;
5376 }
5377
5378 static void si_enable_interrupts(struct radeon_device *rdev)
5379 {
5380         u32 ih_cntl = RREG32(IH_CNTL);
5381         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5382
5383         ih_cntl |= ENABLE_INTR;
5384         ih_rb_cntl |= IH_RB_ENABLE;
5385         WREG32(IH_CNTL, ih_cntl);
5386         WREG32(IH_RB_CNTL, ih_rb_cntl);
5387         rdev->ih.enabled = true;
5388 }
5389
5390 static void si_disable_interrupts(struct radeon_device *rdev)
5391 {
5392         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5393         u32 ih_cntl = RREG32(IH_CNTL);
5394
5395         ih_rb_cntl &= ~IH_RB_ENABLE;
5396         ih_cntl &= ~ENABLE_INTR;
5397         WREG32(IH_RB_CNTL, ih_rb_cntl);
5398         WREG32(IH_CNTL, ih_cntl);
5399         /* set rptr, wptr to 0 */
5400         WREG32(IH_RB_RPTR, 0);
5401         WREG32(IH_RB_WPTR, 0);
5402         rdev->ih.enabled = false;
5403         rdev->ih.rptr = 0;
5404 }
5405
5406 static void si_disable_interrupt_state(struct radeon_device *rdev)
5407 {
5408         u32 tmp;
5409
5410         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5411         WREG32(CP_INT_CNTL_RING1, 0);
5412         WREG32(CP_INT_CNTL_RING2, 0);
5413         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5414         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5415         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5416         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5417         WREG32(GRBM_INT_CNTL, 0);
5418         if (rdev->num_crtc >= 2) {
5419                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5420                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5421         }
5422         if (rdev->num_crtc >= 4) {
5423                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5424                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5425         }
5426         if (rdev->num_crtc >= 6) {
5427                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5428                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5429         }
5430
5431         if (rdev->num_crtc >= 2) {
5432                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5433                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5434         }
5435         if (rdev->num_crtc >= 4) {
5436                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5437                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5438         }
5439         if (rdev->num_crtc >= 6) {
5440                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5441                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5442         }
5443
5444         if (!ASIC_IS_NODCE(rdev)) {
5445                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5446
5447                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5448                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5449                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5450                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5451                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5452                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5453                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5454                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5455                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5456                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5457                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5458                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5459         }
5460 }
5461
5462 static int si_irq_init(struct radeon_device *rdev)
5463 {
5464         int ret = 0;
5465         int rb_bufsz;
5466         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5467
5468         /* allocate ring */
5469         ret = r600_ih_ring_alloc(rdev);
5470         if (ret)
5471                 return ret;
5472
5473         /* disable irqs */
5474         si_disable_interrupts(rdev);
5475
5476         /* init rlc */
5477         ret = si_rlc_resume(rdev);
5478         if (ret) {
5479                 r600_ih_ring_fini(rdev);
5480                 return ret;
5481         }
5482
5483         /* setup interrupt control */
5484         /* set dummy read address to ring address */
5485         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5486         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5487         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5488          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5489          */
5490         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5491         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5492         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5493         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5494
5495         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5496         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5497
5498         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5499                       IH_WPTR_OVERFLOW_CLEAR |
5500                       (rb_bufsz << 1));
5501
5502         if (rdev->wb.enabled)
5503                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5504
5505         /* set the writeback address whether it's enabled or not */
5506         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5507         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5508
5509         WREG32(IH_RB_CNTL, ih_rb_cntl);
5510
5511         /* set rptr, wptr to 0 */
5512         WREG32(IH_RB_RPTR, 0);
5513         WREG32(IH_RB_WPTR, 0);
5514
5515         /* Default settings for IH_CNTL (disabled at first) */
5516         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5517         /* RPTR_REARM only works if msi's are enabled */
5518         if (rdev->msi_enabled)
5519                 ih_cntl |= RPTR_REARM;
5520         WREG32(IH_CNTL, ih_cntl);
5521
5522         /* force the active interrupt state to all disabled */
5523         si_disable_interrupt_state(rdev);
5524
5525         pci_set_master(rdev->pdev);
5526
5527         /* enable irqs */
5528         si_enable_interrupts(rdev);
5529
5530         return ret;
5531 }
5532
5533 int si_irq_set(struct radeon_device *rdev)
5534 {
5535         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5536         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5537         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5538         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5539         u32 grbm_int_cntl = 0;
5540         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5541         u32 dma_cntl, dma_cntl1;
5542         u32 thermal_int = 0;
5543
5544         if (!rdev->irq.installed) {
5545                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5546                 return -EINVAL;
5547         }
5548         /* don't enable anything if the ih is disabled */
5549         if (!rdev->ih.enabled) {
5550                 si_disable_interrupts(rdev);
5551                 /* force the active interrupt state to all disabled */
5552                 si_disable_interrupt_state(rdev);
5553                 return 0;
5554         }
5555
5556         if (!ASIC_IS_NODCE(rdev)) {
5557                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5558                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5559                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5560                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5561                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5562                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5563         }
5564
5565         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5566         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5567
5568         thermal_int = RREG32(CG_THERMAL_INT) &
5569                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5570
5571         /* enable CP interrupts on all rings */
5572         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5573                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5574                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5575         }
5576         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5577                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5578                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5579         }
5580         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5581                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5582                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5583         }
5584         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5585                 DRM_DEBUG("si_irq_set: sw int dma\n");
5586                 dma_cntl |= TRAP_ENABLE;
5587         }
5588
5589         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5590                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5591                 dma_cntl1 |= TRAP_ENABLE;
5592         }
5593         if (rdev->irq.crtc_vblank_int[0] ||
5594             atomic_read(&rdev->irq.pflip[0])) {
5595                 DRM_DEBUG("si_irq_set: vblank 0\n");
5596                 crtc1 |= VBLANK_INT_MASK;
5597         }
5598         if (rdev->irq.crtc_vblank_int[1] ||
5599             atomic_read(&rdev->irq.pflip[1])) {
5600                 DRM_DEBUG("si_irq_set: vblank 1\n");
5601                 crtc2 |= VBLANK_INT_MASK;
5602         }
5603         if (rdev->irq.crtc_vblank_int[2] ||
5604             atomic_read(&rdev->irq.pflip[2])) {
5605                 DRM_DEBUG("si_irq_set: vblank 2\n");
5606                 crtc3 |= VBLANK_INT_MASK;
5607         }
5608         if (rdev->irq.crtc_vblank_int[3] ||
5609             atomic_read(&rdev->irq.pflip[3])) {
5610                 DRM_DEBUG("si_irq_set: vblank 3\n");
5611                 crtc4 |= VBLANK_INT_MASK;
5612         }
5613         if (rdev->irq.crtc_vblank_int[4] ||
5614             atomic_read(&rdev->irq.pflip[4])) {
5615                 DRM_DEBUG("si_irq_set: vblank 4\n");
5616                 crtc5 |= VBLANK_INT_MASK;
5617         }
5618         if (rdev->irq.crtc_vblank_int[5] ||
5619             atomic_read(&rdev->irq.pflip[5])) {
5620                 DRM_DEBUG("si_irq_set: vblank 5\n");
5621                 crtc6 |= VBLANK_INT_MASK;
5622         }
5623         if (rdev->irq.hpd[0]) {
5624                 DRM_DEBUG("si_irq_set: hpd 1\n");
5625                 hpd1 |= DC_HPDx_INT_EN;
5626         }
5627         if (rdev->irq.hpd[1]) {
5628                 DRM_DEBUG("si_irq_set: hpd 2\n");
5629                 hpd2 |= DC_HPDx_INT_EN;
5630         }
5631         if (rdev->irq.hpd[2]) {
5632                 DRM_DEBUG("si_irq_set: hpd 3\n");
5633                 hpd3 |= DC_HPDx_INT_EN;
5634         }
5635         if (rdev->irq.hpd[3]) {
5636                 DRM_DEBUG("si_irq_set: hpd 4\n");
5637                 hpd4 |= DC_HPDx_INT_EN;
5638         }
5639         if (rdev->irq.hpd[4]) {
5640                 DRM_DEBUG("si_irq_set: hpd 5\n");
5641                 hpd5 |= DC_HPDx_INT_EN;
5642         }
5643         if (rdev->irq.hpd[5]) {
5644                 DRM_DEBUG("si_irq_set: hpd 6\n");
5645                 hpd6 |= DC_HPDx_INT_EN;
5646         }
5647
5648         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5649         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5650         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5651
5652         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5653         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5654
5655         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5656
5657         if (rdev->irq.dpm_thermal) {
5658                 DRM_DEBUG("dpm thermal\n");
5659                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5660         }
5661
5662         if (rdev->num_crtc >= 2) {
5663                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5664                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5665         }
5666         if (rdev->num_crtc >= 4) {
5667                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5668                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5669         }
5670         if (rdev->num_crtc >= 6) {
5671                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5672                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5673         }
5674
5675         if (rdev->num_crtc >= 2) {
5676                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5677                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5678         }
5679         if (rdev->num_crtc >= 4) {
5680                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5681                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5682         }
5683         if (rdev->num_crtc >= 6) {
5684                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5685                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5686         }
5687
5688         if (!ASIC_IS_NODCE(rdev)) {
5689                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5690                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5691                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5692                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5693                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5694                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5695         }
5696
5697         WREG32(CG_THERMAL_INT, thermal_int);
5698
5699         return 0;
5700 }
5701
5702 static inline void si_irq_ack(struct radeon_device *rdev)
5703 {
5704         u32 tmp;
5705
5706         if (ASIC_IS_NODCE(rdev))
5707                 return;
5708
5709         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5710         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5711         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5712         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5713         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5714         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5715         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5716         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5717         if (rdev->num_crtc >= 4) {
5718                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5719                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5720         }
5721         if (rdev->num_crtc >= 6) {
5722                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5723                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5724         }
5725
5726         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5727                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5728         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5729                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5730         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5731                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5732         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5733                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5734         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5735                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5736         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5737                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5738
5739         if (rdev->num_crtc >= 4) {
5740                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5741                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5742                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5743                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5744                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5745                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5746                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5747                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5748                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5749                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5750                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5751                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5752         }
5753
5754         if (rdev->num_crtc >= 6) {
5755                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5756                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5757                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5758                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5759                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5760                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5761                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5762                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5763                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5764                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5765                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5766                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5767         }
5768
5769         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5770                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5771                 tmp |= DC_HPDx_INT_ACK;
5772                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5773         }
5774         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5775                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5776                 tmp |= DC_HPDx_INT_ACK;
5777                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5778         }
5779         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5780                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5781                 tmp |= DC_HPDx_INT_ACK;
5782                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5783         }
5784         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5785                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5786                 tmp |= DC_HPDx_INT_ACK;
5787                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5788         }
5789         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5790                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5791                 tmp |= DC_HPDx_INT_ACK;
5792                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5793         }
5794         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5795                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5796                 tmp |= DC_HPDx_INT_ACK;
5797                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5798         }
5799 }
5800
5801 static void si_irq_disable(struct radeon_device *rdev)
5802 {
5803         si_disable_interrupts(rdev);
5804         /* Wait and acknowledge irq */
5805         mdelay(1);
5806         si_irq_ack(rdev);
5807         si_disable_interrupt_state(rdev);
5808 }
5809
5810 static void si_irq_suspend(struct radeon_device *rdev)
5811 {
5812         si_irq_disable(rdev);
5813         si_rlc_stop(rdev);
5814 }
5815
5816 static void si_irq_fini(struct radeon_device *rdev)
5817 {
5818         si_irq_suspend(rdev);
5819         r600_ih_ring_fini(rdev);
5820 }
5821
5822 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5823 {
5824         u32 wptr, tmp;
5825
5826         if (rdev->wb.enabled)
5827                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5828         else
5829                 wptr = RREG32(IH_RB_WPTR);
5830
5831         if (wptr & RB_OVERFLOW) {
5832                 /* When a ring buffer overflow happen start parsing interrupt
5833                  * from the last not overwritten vector (wptr + 16). Hopefully
5834                  * this should allow us to catchup.
5835                  */
5836                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5837                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5838                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5839                 tmp = RREG32(IH_RB_CNTL);
5840                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5841                 WREG32(IH_RB_CNTL, tmp);
5842         }
5843         return (wptr & rdev->ih.ptr_mask);
5844 }
5845
5846 /*        SI IV Ring
5847  * Each IV ring entry is 128 bits:
5848  * [7:0]    - interrupt source id
5849  * [31:8]   - reserved
5850  * [59:32]  - interrupt source data
5851  * [63:60]  - reserved
5852  * [71:64]  - RINGID
5853  * [79:72]  - VMID
5854  * [127:80] - reserved
5855  */
5856 int si_irq_process(struct radeon_device *rdev)
5857 {
5858         u32 wptr;
5859         u32 rptr;
5860         u32 src_id, src_data, ring_id;
5861         u32 ring_index;
5862         bool queue_hotplug = false;
5863         bool queue_thermal = false;
5864         u32 status, addr;
5865
5866         if (!rdev->ih.enabled || rdev->shutdown)
5867                 return IRQ_NONE;
5868
5869         wptr = si_get_ih_wptr(rdev);
5870
5871 restart_ih:
5872         /* is somebody else already processing irqs? */
5873         if (atomic_xchg(&rdev->ih.lock, 1))
5874                 return IRQ_NONE;
5875
5876         rptr = rdev->ih.rptr;
5877         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5878
5879         /* Order reading of wptr vs. reading of IH ring data */
5880         rmb();
5881
5882         /* display interrupts */
5883         si_irq_ack(rdev);
5884
5885         while (rptr != wptr) {
5886                 /* wptr/rptr are in bytes! */
5887                 ring_index = rptr / 4;
5888                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5889                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5890                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5891
5892                 switch (src_id) {
5893                 case 1: /* D1 vblank/vline */
5894                         switch (src_data) {
5895                         case 0: /* D1 vblank */
5896                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
5897                                         if (rdev->irq.crtc_vblank_int[0]) {
5898                                                 drm_handle_vblank(rdev->ddev, 0);
5899                                                 rdev->pm.vblank_sync = true;
5900                                                 wake_up(&rdev->irq.vblank_queue);
5901                                         }
5902                                         if (atomic_read(&rdev->irq.pflip[0]))
5903                                                 radeon_crtc_handle_flip(rdev, 0);
5904                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5905                                         DRM_DEBUG("IH: D1 vblank\n");
5906                                 }
5907                                 break;
5908                         case 1: /* D1 vline */
5909                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
5910                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5911                                         DRM_DEBUG("IH: D1 vline\n");
5912                                 }
5913                                 break;
5914                         default:
5915                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5916                                 break;
5917                         }
5918                         break;
5919                 case 2: /* D2 vblank/vline */
5920                         switch (src_data) {
5921                         case 0: /* D2 vblank */
5922                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5923                                         if (rdev->irq.crtc_vblank_int[1]) {
5924                                                 drm_handle_vblank(rdev->ddev, 1);
5925                                                 rdev->pm.vblank_sync = true;
5926                                                 wake_up(&rdev->irq.vblank_queue);
5927                                         }
5928                                         if (atomic_read(&rdev->irq.pflip[1]))
5929                                                 radeon_crtc_handle_flip(rdev, 1);
5930                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5931                                         DRM_DEBUG("IH: D2 vblank\n");
5932                                 }
5933                                 break;
5934                         case 1: /* D2 vline */
5935                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5936                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5937                                         DRM_DEBUG("IH: D2 vline\n");
5938                                 }
5939                                 break;
5940                         default:
5941                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5942                                 break;
5943                         }
5944                         break;
5945                 case 3: /* D3 vblank/vline */
5946                         switch (src_data) {
5947                         case 0: /* D3 vblank */
5948                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5949                                         if (rdev->irq.crtc_vblank_int[2]) {
5950                                                 drm_handle_vblank(rdev->ddev, 2);
5951                                                 rdev->pm.vblank_sync = true;
5952                                                 wake_up(&rdev->irq.vblank_queue);
5953                                         }
5954                                         if (atomic_read(&rdev->irq.pflip[2]))
5955                                                 radeon_crtc_handle_flip(rdev, 2);
5956                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5957                                         DRM_DEBUG("IH: D3 vblank\n");
5958                                 }
5959                                 break;
5960                         case 1: /* D3 vline */
5961                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5962                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5963                                         DRM_DEBUG("IH: D3 vline\n");
5964                                 }
5965                                 break;
5966                         default:
5967                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5968                                 break;
5969                         }
5970                         break;
5971                 case 4: /* D4 vblank/vline */
5972                         switch (src_data) {
5973                         case 0: /* D4 vblank */
5974                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5975                                         if (rdev->irq.crtc_vblank_int[3]) {
5976                                                 drm_handle_vblank(rdev->ddev, 3);
5977                                                 rdev->pm.vblank_sync = true;
5978                                                 wake_up(&rdev->irq.vblank_queue);
5979                                         }
5980                                         if (atomic_read(&rdev->irq.pflip[3]))
5981                                                 radeon_crtc_handle_flip(rdev, 3);
5982                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5983                                         DRM_DEBUG("IH: D4 vblank\n");
5984                                 }
5985                                 break;
5986                         case 1: /* D4 vline */
5987                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5988                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5989                                         DRM_DEBUG("IH: D4 vline\n");
5990                                 }
5991                                 break;
5992                         default:
5993                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5994                                 break;
5995                         }
5996                         break;
5997                 case 5: /* D5 vblank/vline */
5998                         switch (src_data) {
5999                         case 0: /* D5 vblank */
6000                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6001                                         if (rdev->irq.crtc_vblank_int[4]) {
6002                                                 drm_handle_vblank(rdev->ddev, 4);
6003                                                 rdev->pm.vblank_sync = true;
6004                                                 wake_up(&rdev->irq.vblank_queue);
6005                                         }
6006                                         if (atomic_read(&rdev->irq.pflip[4]))
6007                                                 radeon_crtc_handle_flip(rdev, 4);
6008                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6009                                         DRM_DEBUG("IH: D5 vblank\n");
6010                                 }
6011                                 break;
6012                         case 1: /* D5 vline */
6013                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6014                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6015                                         DRM_DEBUG("IH: D5 vline\n");
6016                                 }
6017                                 break;
6018                         default:
6019                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6020                                 break;
6021                         }
6022                         break;
6023                 case 6: /* D6 vblank/vline */
6024                         switch (src_data) {
6025                         case 0: /* D6 vblank */
6026                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6027                                         if (rdev->irq.crtc_vblank_int[5]) {
6028                                                 drm_handle_vblank(rdev->ddev, 5);
6029                                                 rdev->pm.vblank_sync = true;
6030                                                 wake_up(&rdev->irq.vblank_queue);
6031                                         }
6032                                         if (atomic_read(&rdev->irq.pflip[5]))
6033                                                 radeon_crtc_handle_flip(rdev, 5);
6034                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6035                                         DRM_DEBUG("IH: D6 vblank\n");
6036                                 }
6037                                 break;
6038                         case 1: /* D6 vline */
6039                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6040                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6041                                         DRM_DEBUG("IH: D6 vline\n");
6042                                 }
6043                                 break;
6044                         default:
6045                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6046                                 break;
6047                         }
6048                         break;
6049                 case 42: /* HPD hotplug */
6050                         switch (src_data) {
6051                         case 0:
6052                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6053                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6054                                         queue_hotplug = true;
6055                                         DRM_DEBUG("IH: HPD1\n");
6056                                 }
6057                                 break;
6058                         case 1:
6059                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6060                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6061                                         queue_hotplug = true;
6062                                         DRM_DEBUG("IH: HPD2\n");
6063                                 }
6064                                 break;
6065                         case 2:
6066                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6067                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6068                                         queue_hotplug = true;
6069                                         DRM_DEBUG("IH: HPD3\n");
6070                                 }
6071                                 break;
6072                         case 3:
6073                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6074                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6075                                         queue_hotplug = true;
6076                                         DRM_DEBUG("IH: HPD4\n");
6077                                 }
6078                                 break;
6079                         case 4:
6080                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6081                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6082                                         queue_hotplug = true;
6083                                         DRM_DEBUG("IH: HPD5\n");
6084                                 }
6085                                 break;
6086                         case 5:
6087                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6088                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6089                                         queue_hotplug = true;
6090                                         DRM_DEBUG("IH: HPD6\n");
6091                                 }
6092                                 break;
6093                         default:
6094                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6095                                 break;
6096                         }
6097                         break;
6098                 case 146:
6099                 case 147:
6100                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6101                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6102                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6103                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6104                                 addr);
6105                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6106                                 status);
6107                         si_vm_decode_fault(rdev, status, addr);
6108                         /* reset addr and status */
6109                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6110                         break;
6111                 case 176: /* RINGID0 CP_INT */
6112                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6113                         break;
6114                 case 177: /* RINGID1 CP_INT */
6115                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6116                         break;
6117                 case 178: /* RINGID2 CP_INT */
6118                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6119                         break;
6120                 case 181: /* CP EOP event */
6121                         DRM_DEBUG("IH: CP EOP\n");
6122                         switch (ring_id) {
6123                         case 0:
6124                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6125                                 break;
6126                         case 1:
6127                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6128                                 break;
6129                         case 2:
6130                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6131                                 break;
6132                         }
6133                         break;
6134                 case 224: /* DMA trap event */
6135                         DRM_DEBUG("IH: DMA trap\n");
6136                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6137                         break;
6138                 case 230: /* thermal low to high */
6139                         DRM_DEBUG("IH: thermal low to high\n");
6140                         rdev->pm.dpm.thermal.high_to_low = false;
6141                         queue_thermal = true;
6142                         break;
6143                 case 231: /* thermal high to low */
6144                         DRM_DEBUG("IH: thermal high to low\n");
6145                         rdev->pm.dpm.thermal.high_to_low = true;
6146                         queue_thermal = true;
6147                         break;
6148                 case 233: /* GUI IDLE */
6149                         DRM_DEBUG("IH: GUI idle\n");
6150                         break;
6151                 case 244: /* DMA trap event */
6152                         DRM_DEBUG("IH: DMA1 trap\n");
6153                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6154                         break;
6155                 default:
6156                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6157                         break;
6158                 }
6159
6160                 /* wptr/rptr are in bytes! */
6161                 rptr += 16;
6162                 rptr &= rdev->ih.ptr_mask;
6163         }
6164         if (queue_hotplug)
6165                 schedule_work(&rdev->hotplug_work);
6166         if (queue_thermal && rdev->pm.dpm_enabled)
6167                 schedule_work(&rdev->pm.dpm.thermal.work);
6168         rdev->ih.rptr = rptr;
6169         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6170         atomic_set(&rdev->ih.lock, 0);
6171
6172         /* make sure wptr hasn't changed while processing */
6173         wptr = si_get_ih_wptr(rdev);
6174         if (wptr != rptr)
6175                 goto restart_ih;
6176
6177         return IRQ_HANDLED;
6178 }
6179
6180 /**
6181  * si_copy_dma - copy pages using the DMA engine
6182  *
6183  * @rdev: radeon_device pointer
6184  * @src_offset: src GPU address
6185  * @dst_offset: dst GPU address
6186  * @num_gpu_pages: number of GPU pages to xfer
6187  * @fence: radeon fence object
6188  *
6189  * Copy GPU paging using the DMA engine (SI).
6190  * Used by the radeon ttm implementation to move pages if
6191  * registered as the asic copy callback.
6192  */
6193 int si_copy_dma(struct radeon_device *rdev,
6194                 uint64_t src_offset, uint64_t dst_offset,
6195                 unsigned num_gpu_pages,
6196                 struct radeon_fence **fence)
6197 {
6198         struct radeon_semaphore *sem = NULL;
6199         int ring_index = rdev->asic->copy.dma_ring_index;
6200         struct radeon_ring *ring = &rdev->ring[ring_index];
6201         u32 size_in_bytes, cur_size_in_bytes;
6202         int i, num_loops;
6203         int r = 0;
6204
6205         r = radeon_semaphore_create(rdev, &sem);
6206         if (r) {
6207                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6208                 return r;
6209         }
6210
6211         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6212         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6213         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6214         if (r) {
6215                 DRM_ERROR("radeon: moving bo (%d).\n", r);
6216                 radeon_semaphore_free(rdev, &sem, NULL);
6217                 return r;
6218         }
6219
6220         if (radeon_fence_need_sync(*fence, ring->idx)) {
6221                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6222                                             ring->idx);
6223                 radeon_fence_note_sync(*fence, ring->idx);
6224         } else {
6225                 radeon_semaphore_free(rdev, &sem, NULL);
6226         }
6227
6228         for (i = 0; i < num_loops; i++) {
6229                 cur_size_in_bytes = size_in_bytes;
6230                 if (cur_size_in_bytes > 0xFFFFF)
6231                         cur_size_in_bytes = 0xFFFFF;
6232                 size_in_bytes -= cur_size_in_bytes;
6233                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6234                 radeon_ring_write(ring, dst_offset & 0xffffffff);
6235                 radeon_ring_write(ring, src_offset & 0xffffffff);
6236                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6237                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6238                 src_offset += cur_size_in_bytes;
6239                 dst_offset += cur_size_in_bytes;
6240         }
6241
6242         r = radeon_fence_emit(rdev, fence, ring->idx);
6243         if (r) {
6244                 radeon_ring_unlock_undo(rdev, ring);
6245                 return r;
6246         }
6247
6248         radeon_ring_unlock_commit(rdev, ring);
6249         radeon_semaphore_free(rdev, &sem, *fence);
6250
6251         return r;
6252 }
6253
6254 /*
6255  * startup/shutdown callbacks
6256  */
6257 static int si_startup(struct radeon_device *rdev)
6258 {
6259         struct radeon_ring *ring;
6260         int r;
6261
6262         /* enable pcie gen2/3 link */
6263         si_pcie_gen3_enable(rdev);
6264         /* enable aspm */
6265         si_program_aspm(rdev);
6266
6267         si_mc_program(rdev);
6268
6269         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6270             !rdev->rlc_fw || !rdev->mc_fw) {
6271                 r = si_init_microcode(rdev);
6272                 if (r) {
6273                         DRM_ERROR("Failed to load firmware!\n");
6274                         return r;
6275                 }
6276         }
6277
6278         r = si_mc_load_microcode(rdev);
6279         if (r) {
6280                 DRM_ERROR("Failed to load MC firmware!\n");
6281                 return r;
6282         }
6283
6284         r = r600_vram_scratch_init(rdev);
6285         if (r)
6286                 return r;
6287
6288         r = si_pcie_gart_enable(rdev);
6289         if (r)
6290                 return r;
6291         si_gpu_init(rdev);
6292
6293         /* allocate rlc buffers */
6294         if (rdev->family == CHIP_VERDE) {
6295                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6296                 rdev->rlc.reg_list_size =
6297                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6298         }
6299         rdev->rlc.cs_data = si_cs_data;
6300         r = sumo_rlc_init(rdev);
6301         if (r) {
6302                 DRM_ERROR("Failed to init rlc BOs!\n");
6303                 return r;
6304         }
6305
6306         /* allocate wb buffer */
6307         r = radeon_wb_init(rdev);
6308         if (r)
6309                 return r;
6310
6311         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6312         if (r) {
6313                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6314                 return r;
6315         }
6316
6317         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6318         if (r) {
6319                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6320                 return r;
6321         }
6322
6323         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6324         if (r) {
6325                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6326                 return r;
6327         }
6328
6329         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6330         if (r) {
6331                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6332                 return r;
6333         }
6334
6335         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6336         if (r) {
6337                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6338                 return r;
6339         }
6340
6341         if (rdev->has_uvd) {
6342                 r = rv770_uvd_resume(rdev);
6343                 if (!r) {
6344                         r = radeon_fence_driver_start_ring(rdev,
6345                                                            R600_RING_TYPE_UVD_INDEX);
6346                         if (r)
6347                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6348                 }
6349                 if (r)
6350                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6351         }
6352
6353         /* Enable IRQ */
6354         if (!rdev->irq.installed) {
6355                 r = radeon_irq_kms_init(rdev);
6356                 if (r)
6357                         return r;
6358         }
6359
6360         r = si_irq_init(rdev);
6361         if (r) {
6362                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6363                 radeon_irq_kms_fini(rdev);
6364                 return r;
6365         }
6366         si_irq_set(rdev);
6367
6368         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6369         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6370                              CP_RB0_RPTR, CP_RB0_WPTR,
6371                              0, 0xfffff, RADEON_CP_PACKET2);
6372         if (r)
6373                 return r;
6374
6375         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6376         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6377                              CP_RB1_RPTR, CP_RB1_WPTR,
6378                              0, 0xfffff, RADEON_CP_PACKET2);
6379         if (r)
6380                 return r;
6381
6382         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6383         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6384                              CP_RB2_RPTR, CP_RB2_WPTR,
6385                              0, 0xfffff, RADEON_CP_PACKET2);
6386         if (r)
6387                 return r;
6388
6389         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6390         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6391                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6392                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6393                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6394         if (r)
6395                 return r;
6396
6397         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6398         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6399                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6400                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6401                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6402         if (r)
6403                 return r;
6404
6405         r = si_cp_load_microcode(rdev);
6406         if (r)
6407                 return r;
6408         r = si_cp_resume(rdev);
6409         if (r)
6410                 return r;
6411
6412         r = cayman_dma_resume(rdev);
6413         if (r)
6414                 return r;
6415
6416         if (rdev->has_uvd) {
6417                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6418                 if (ring->ring_size) {
6419                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6420                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6421                                              0, 0xfffff, RADEON_CP_PACKET2);
6422                         if (!r)
6423                                 r = r600_uvd_init(rdev, true);
6424                         if (r)
6425                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6426                 }
6427         }
6428
6429         r = radeon_ib_pool_init(rdev);
6430         if (r) {
6431                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6432                 return r;
6433         }
6434
6435         r = radeon_vm_manager_init(rdev);
6436         if (r) {
6437                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6438                 return r;
6439         }
6440
6441         return 0;
6442 }
6443
6444 int si_resume(struct radeon_device *rdev)
6445 {
6446         int r;
6447
6448         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6449          * posting will perform necessary task to bring back GPU into good
6450          * shape.
6451          */
6452         /* post card */
6453         atom_asic_init(rdev->mode_info.atom_context);
6454
6455         /* init golden registers */
6456         si_init_golden_registers(rdev);
6457
6458         rdev->accel_working = true;
6459         r = si_startup(rdev);
6460         if (r) {
6461                 DRM_ERROR("si startup failed on resume\n");
6462                 rdev->accel_working = false;
6463                 return r;
6464         }
6465
6466         return r;
6467
6468 }
6469
6470 int si_suspend(struct radeon_device *rdev)
6471 {
6472         radeon_vm_manager_fini(rdev);
6473         si_cp_enable(rdev, false);
6474         cayman_dma_stop(rdev);
6475         if (rdev->has_uvd) {
6476                 r600_uvd_stop(rdev);
6477                 radeon_uvd_suspend(rdev);
6478         }
6479         si_irq_suspend(rdev);
6480         radeon_wb_disable(rdev);
6481         si_pcie_gart_disable(rdev);
6482         return 0;
6483 }
6484
6485 /* Plan is to move initialization in that function and use
6486  * helper function so that radeon_device_init pretty much
6487  * do nothing more than calling asic specific function. This
6488  * should also allow to remove a bunch of callback function
6489  * like vram_info.
6490  */
6491 int si_init(struct radeon_device *rdev)
6492 {
6493         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6494         int r;
6495
6496         /* Read BIOS */
6497         if (!radeon_get_bios(rdev)) {
6498                 if (ASIC_IS_AVIVO(rdev))
6499                         return -EINVAL;
6500         }
6501         /* Must be an ATOMBIOS */
6502         if (!rdev->is_atom_bios) {
6503                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6504                 return -EINVAL;
6505         }
6506         r = radeon_atombios_init(rdev);
6507         if (r)
6508                 return r;
6509
6510         /* Post card if necessary */
6511         if (!radeon_card_posted(rdev)) {
6512                 if (!rdev->bios) {
6513                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6514                         return -EINVAL;
6515                 }
6516                 DRM_INFO("GPU not posted. posting now...\n");
6517                 atom_asic_init(rdev->mode_info.atom_context);
6518         }
6519         /* init golden registers */
6520         si_init_golden_registers(rdev);
6521         /* Initialize scratch registers */
6522         si_scratch_init(rdev);
6523         /* Initialize surface registers */
6524         radeon_surface_init(rdev);
6525         /* Initialize clocks */
6526         radeon_get_clock_info(rdev->ddev);
6527
6528         /* Fence driver */
6529         r = radeon_fence_driver_init(rdev);
6530         if (r)
6531                 return r;
6532
6533         /* initialize memory controller */
6534         r = si_mc_init(rdev);
6535         if (r)
6536                 return r;
6537         /* Memory manager */
6538         r = radeon_bo_init(rdev);
6539         if (r)
6540                 return r;
6541
6542         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6543         ring->ring_obj = NULL;
6544         r600_ring_init(rdev, ring, 1024 * 1024);
6545
6546         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6547         ring->ring_obj = NULL;
6548         r600_ring_init(rdev, ring, 1024 * 1024);
6549
6550         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6551         ring->ring_obj = NULL;
6552         r600_ring_init(rdev, ring, 1024 * 1024);
6553
6554         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6555         ring->ring_obj = NULL;
6556         r600_ring_init(rdev, ring, 64 * 1024);
6557
6558         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6559         ring->ring_obj = NULL;
6560         r600_ring_init(rdev, ring, 64 * 1024);
6561
6562         if (rdev->has_uvd) {
6563                 r = radeon_uvd_init(rdev);
6564                 if (!r) {
6565                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6566                         ring->ring_obj = NULL;
6567                         r600_ring_init(rdev, ring, 4096);
6568                 }
6569         }
6570
6571         rdev->ih.ring_obj = NULL;
6572         r600_ih_ring_init(rdev, 64 * 1024);
6573
6574         r = r600_pcie_gart_init(rdev);
6575         if (r)
6576                 return r;
6577
6578         rdev->accel_working = true;
6579         r = si_startup(rdev);
6580         if (r) {
6581                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6582                 si_cp_fini(rdev);
6583                 cayman_dma_fini(rdev);
6584                 si_irq_fini(rdev);
6585                 sumo_rlc_fini(rdev);
6586                 radeon_wb_fini(rdev);
6587                 radeon_ib_pool_fini(rdev);
6588                 radeon_vm_manager_fini(rdev);
6589                 radeon_irq_kms_fini(rdev);
6590                 si_pcie_gart_fini(rdev);
6591                 rdev->accel_working = false;
6592         }
6593
6594         /* Don't start up if the MC ucode is missing.
6595          * The default clocks and voltages before the MC ucode
6596          * is loaded are not suffient for advanced operations.
6597          */
6598         if (!rdev->mc_fw) {
6599                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6600                 return -EINVAL;
6601         }
6602
6603         return 0;
6604 }
6605
6606 void si_fini(struct radeon_device *rdev)
6607 {
6608         si_cp_fini(rdev);
6609         cayman_dma_fini(rdev);
6610         si_irq_fini(rdev);
6611         sumo_rlc_fini(rdev);
6612         si_fini_cg(rdev);
6613         si_fini_pg(rdev);
6614         radeon_wb_fini(rdev);
6615         radeon_vm_manager_fini(rdev);
6616         radeon_ib_pool_fini(rdev);
6617         radeon_irq_kms_fini(rdev);
6618         if (rdev->has_uvd) {
6619                 r600_uvd_stop(rdev);
6620                 radeon_uvd_fini(rdev);
6621         }
6622         si_pcie_gart_fini(rdev);
6623         r600_vram_scratch_fini(rdev);
6624         radeon_gem_fini(rdev);
6625         radeon_fence_driver_fini(rdev);
6626         radeon_bo_fini(rdev);
6627         radeon_atombios_fini(rdev);
6628         kfree(rdev->bios);
6629         rdev->bios = NULL;
6630 }
6631
6632 /**
6633  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6634  *
6635  * @rdev: radeon_device pointer
6636  *
6637  * Fetches a GPU clock counter snapshot (SI).
6638  * Returns the 64 bit clock counter snapshot.
6639  */
6640 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6641 {
6642         uint64_t clock;
6643
6644         mutex_lock(&rdev->gpu_clock_mutex);
6645         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6646         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6647                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6648         mutex_unlock(&rdev->gpu_clock_mutex);
6649         return clock;
6650 }
6651
6652 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6653 {
6654         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6655         int r;
6656
6657         /* bypass vclk and dclk with bclk */
6658         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6659                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6660                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6661
6662         /* put PLL in bypass mode */
6663         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6664
6665         if (!vclk || !dclk) {
6666                 /* keep the Bypass mode, put PLL to sleep */
6667                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6668                 return 0;
6669         }
6670
6671         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6672                                           16384, 0x03FFFFFF, 0, 128, 5,
6673                                           &fb_div, &vclk_div, &dclk_div);
6674         if (r)
6675                 return r;
6676
6677         /* set RESET_ANTI_MUX to 0 */
6678         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6679
6680         /* set VCO_MODE to 1 */
6681         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6682
6683         /* toggle UPLL_SLEEP to 1 then back to 0 */
6684         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6685         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6686
6687         /* deassert UPLL_RESET */
6688         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6689
6690         mdelay(1);
6691
6692         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6693         if (r)
6694                 return r;
6695
6696         /* assert UPLL_RESET again */
6697         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6698
6699         /* disable spread spectrum. */
6700         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6701
6702         /* set feedback divider */
6703         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6704
6705         /* set ref divider to 0 */
6706         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6707
6708         if (fb_div < 307200)
6709                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6710         else
6711                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6712
6713         /* set PDIV_A and PDIV_B */
6714         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6715                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6716                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6717
6718         /* give the PLL some time to settle */
6719         mdelay(15);
6720
6721         /* deassert PLL_RESET */
6722         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6723
6724         mdelay(15);
6725
6726         /* switch from bypass mode to normal mode */
6727         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6728
6729         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6730         if (r)
6731                 return r;
6732
6733         /* switch VCLK and DCLK selection */
6734         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6735                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6736                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6737
6738         mdelay(100);
6739
6740         return 0;
6741 }
6742
6743 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6744 {
6745         struct pci_dev *root = rdev->pdev->bus->self;
6746         int bridge_pos, gpu_pos;
6747         u32 speed_cntl, mask, current_data_rate;
6748         int ret, i;
6749         u16 tmp16;
6750
6751         if (radeon_pcie_gen2 == 0)
6752                 return;
6753
6754         if (rdev->flags & RADEON_IS_IGP)
6755                 return;
6756
6757         if (!(rdev->flags & RADEON_IS_PCIE))
6758                 return;
6759
6760         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6761         if (ret != 0)
6762                 return;
6763
6764         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6765                 return;
6766
6767         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6768         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6769                 LC_CURRENT_DATA_RATE_SHIFT;
6770         if (mask & DRM_PCIE_SPEED_80) {
6771                 if (current_data_rate == 2) {
6772                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6773                         return;
6774                 }
6775                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6776         } else if (mask & DRM_PCIE_SPEED_50) {
6777                 if (current_data_rate == 1) {
6778                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6779                         return;
6780                 }
6781                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6782         }
6783
6784         bridge_pos = pci_pcie_cap(root);
6785         if (!bridge_pos)
6786                 return;
6787
6788         gpu_pos = pci_pcie_cap(rdev->pdev);
6789         if (!gpu_pos)
6790                 return;
6791
6792         if (mask & DRM_PCIE_SPEED_80) {
6793                 /* re-try equalization if gen3 is not already enabled */
6794                 if (current_data_rate != 2) {
6795                         u16 bridge_cfg, gpu_cfg;
6796                         u16 bridge_cfg2, gpu_cfg2;
6797                         u32 max_lw, current_lw, tmp;
6798
6799                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6800                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6801
6802                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6803                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6804
6805                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6806                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6807
6808                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6809                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6810                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6811
6812                         if (current_lw < max_lw) {
6813                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6814                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6815                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6816                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6817                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6818                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6819                                 }
6820                         }
6821
6822                         for (i = 0; i < 10; i++) {
6823                                 /* check status */
6824                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6825                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6826                                         break;
6827
6828                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6829                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6830
6831                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6832                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6833
6834                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6835                                 tmp |= LC_SET_QUIESCE;
6836                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6837
6838                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6839                                 tmp |= LC_REDO_EQ;
6840                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6841
6842                                 mdelay(100);
6843
6844                                 /* linkctl */
6845                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6846                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6847                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6848                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6849
6850                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6851                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6852                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6853                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6854
6855                                 /* linkctl2 */
6856                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6857                                 tmp16 &= ~((1 << 4) | (7 << 9));
6858                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6859                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6860
6861                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6862                                 tmp16 &= ~((1 << 4) | (7 << 9));
6863                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6864                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6865
6866                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6867                                 tmp &= ~LC_SET_QUIESCE;
6868                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6869                         }
6870                 }
6871         }
6872
6873         /* set the link speed */
6874         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6875         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6876         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6877
6878         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6879         tmp16 &= ~0xf;
6880         if (mask & DRM_PCIE_SPEED_80)
6881                 tmp16 |= 3; /* gen3 */
6882         else if (mask & DRM_PCIE_SPEED_50)
6883                 tmp16 |= 2; /* gen2 */
6884         else
6885                 tmp16 |= 1; /* gen1 */
6886         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6887
6888         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6889         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6890         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6891
6892         for (i = 0; i < rdev->usec_timeout; i++) {
6893                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6894                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6895                         break;
6896                 udelay(1);
6897         }
6898 }
6899
6900 static void si_program_aspm(struct radeon_device *rdev)
6901 {
6902         u32 data, orig;
6903         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6904         bool disable_clkreq = false;
6905
6906         if (radeon_aspm == 0)
6907                 return;
6908
6909         if (!(rdev->flags & RADEON_IS_PCIE))
6910                 return;
6911
6912         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6913         data &= ~LC_XMIT_N_FTS_MASK;
6914         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6915         if (orig != data)
6916                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6917
6918         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6919         data |= LC_GO_TO_RECOVERY;
6920         if (orig != data)
6921                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6922
6923         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6924         data |= P_IGNORE_EDB_ERR;
6925         if (orig != data)
6926                 WREG32_PCIE(PCIE_P_CNTL, data);
6927
6928         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6929         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6930         data |= LC_PMI_TO_L1_DIS;
6931         if (!disable_l0s)
6932                 data |= LC_L0S_INACTIVITY(7);
6933
6934         if (!disable_l1) {
6935                 data |= LC_L1_INACTIVITY(7);
6936                 data &= ~LC_PMI_TO_L1_DIS;
6937                 if (orig != data)
6938                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6939
6940                 if (!disable_plloff_in_l1) {
6941                         bool clk_req_support;
6942
6943                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6944                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6945                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6946                         if (orig != data)
6947                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6948
6949                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6950                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6951                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6952                         if (orig != data)
6953                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6954
6955                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6956                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
6957                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
6958                         if (orig != data)
6959                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6960
6961                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6962                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
6963                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
6964                         if (orig != data)
6965                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6966
6967                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
6968                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
6969                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6970                                 if (orig != data)
6971                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
6972
6973                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
6974                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6975                                 if (orig != data)
6976                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
6977
6978                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
6979                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
6980                                 if (orig != data)
6981                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
6982
6983                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
6984                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
6985                                 if (orig != data)
6986                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
6987
6988                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
6989                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
6990                                 if (orig != data)
6991                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
6992
6993                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
6994                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
6995                                 if (orig != data)
6996                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
6997
6998                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
6999                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7000                                 if (orig != data)
7001                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7002
7003                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7004                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7005                                 if (orig != data)
7006                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7007                         }
7008                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7009                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7010                         data |= LC_DYN_LANES_PWR_STATE(3);
7011                         if (orig != data)
7012                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7013
7014                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7015                         data &= ~LS2_EXIT_TIME_MASK;
7016                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7017                                 data |= LS2_EXIT_TIME(5);
7018                         if (orig != data)
7019                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7020
7021                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7022                         data &= ~LS2_EXIT_TIME_MASK;
7023                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7024                                 data |= LS2_EXIT_TIME(5);
7025                         if (orig != data)
7026                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7027
7028                         if (!disable_clkreq) {
7029                                 struct pci_dev *root = rdev->pdev->bus->self;
7030                                 u32 lnkcap;
7031
7032                                 clk_req_support = false;
7033                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7034                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7035                                         clk_req_support = true;
7036                         } else {
7037                                 clk_req_support = false;
7038                         }
7039
7040                         if (clk_req_support) {
7041                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7042                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7043                                 if (orig != data)
7044                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7045
7046                                 orig = data = RREG32(THM_CLK_CNTL);
7047                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7048                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7049                                 if (orig != data)
7050                                         WREG32(THM_CLK_CNTL, data);
7051
7052                                 orig = data = RREG32(MISC_CLK_CNTL);
7053                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7054                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7055                                 if (orig != data)
7056                                         WREG32(MISC_CLK_CNTL, data);
7057
7058                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7059                                 data &= ~BCLK_AS_XCLK;
7060                                 if (orig != data)
7061                                         WREG32(CG_CLKPIN_CNTL, data);
7062
7063                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7064                                 data &= ~FORCE_BIF_REFCLK_EN;
7065                                 if (orig != data)
7066                                         WREG32(CG_CLKPIN_CNTL_2, data);
7067
7068                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7069                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7070                                 data |= MPLL_CLKOUT_SEL(4);
7071                                 if (orig != data)
7072                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7073
7074                                 orig = data = RREG32(SPLL_CNTL_MODE);
7075                                 data &= ~SPLL_REFCLK_SEL_MASK;
7076                                 if (orig != data)
7077                                         WREG32(SPLL_CNTL_MODE, data);
7078                         }
7079                 }
7080         } else {
7081                 if (orig != data)
7082                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7083         }
7084
7085         orig = data = RREG32_PCIE(PCIE_CNTL2);
7086         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7087         if (orig != data)
7088                 WREG32_PCIE(PCIE_CNTL2, data);
7089
7090         if (!disable_l0s) {
7091                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7092                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7093                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7094                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7095                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7096                                 data &= ~LC_L0S_INACTIVITY_MASK;
7097                                 if (orig != data)
7098                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7099                         }
7100                 }
7101         }
7102 }