2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
81 /* get temperature in millidegrees */
82 int ci_get_temp(struct radeon_device *rdev)
87 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
93 actual_temp = temp & 0x1ff;
95 actual_temp = actual_temp * 1000;
100 /* get temperature in millidegrees */
101 int kv_get_temp(struct radeon_device *rdev)
106 temp = RREG32_SMC(0xC0300E0C);
109 actual_temp = (temp / 8) - 49;
113 actual_temp = actual_temp * 1000;
119 * Indirect registers accessor
121 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
125 WREG32(PCIE_INDEX, reg);
126 (void)RREG32(PCIE_INDEX);
127 r = RREG32(PCIE_DATA);
131 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
133 WREG32(PCIE_INDEX, reg);
134 (void)RREG32(PCIE_INDEX);
135 WREG32(PCIE_DATA, v);
136 (void)RREG32(PCIE_DATA);
139 static const u32 spectre_rlc_save_restore_register_list[] =
141 (0x0e00 << 16) | (0xc12c >> 2),
143 (0x0e00 << 16) | (0xc140 >> 2),
145 (0x0e00 << 16) | (0xc150 >> 2),
147 (0x0e00 << 16) | (0xc15c >> 2),
149 (0x0e00 << 16) | (0xc168 >> 2),
151 (0x0e00 << 16) | (0xc170 >> 2),
153 (0x0e00 << 16) | (0xc178 >> 2),
155 (0x0e00 << 16) | (0xc204 >> 2),
157 (0x0e00 << 16) | (0xc2b4 >> 2),
159 (0x0e00 << 16) | (0xc2b8 >> 2),
161 (0x0e00 << 16) | (0xc2bc >> 2),
163 (0x0e00 << 16) | (0xc2c0 >> 2),
165 (0x0e00 << 16) | (0x8228 >> 2),
167 (0x0e00 << 16) | (0x829c >> 2),
169 (0x0e00 << 16) | (0x869c >> 2),
171 (0x0600 << 16) | (0x98f4 >> 2),
173 (0x0e00 << 16) | (0x98f8 >> 2),
175 (0x0e00 << 16) | (0x9900 >> 2),
177 (0x0e00 << 16) | (0xc260 >> 2),
179 (0x0e00 << 16) | (0x90e8 >> 2),
181 (0x0e00 << 16) | (0x3c000 >> 2),
183 (0x0e00 << 16) | (0x3c00c >> 2),
185 (0x0e00 << 16) | (0x8c1c >> 2),
187 (0x0e00 << 16) | (0x9700 >> 2),
189 (0x0e00 << 16) | (0xcd20 >> 2),
191 (0x4e00 << 16) | (0xcd20 >> 2),
193 (0x5e00 << 16) | (0xcd20 >> 2),
195 (0x6e00 << 16) | (0xcd20 >> 2),
197 (0x7e00 << 16) | (0xcd20 >> 2),
199 (0x8e00 << 16) | (0xcd20 >> 2),
201 (0x9e00 << 16) | (0xcd20 >> 2),
203 (0xae00 << 16) | (0xcd20 >> 2),
205 (0xbe00 << 16) | (0xcd20 >> 2),
207 (0x0e00 << 16) | (0x89bc >> 2),
209 (0x0e00 << 16) | (0x8900 >> 2),
212 (0x0e00 << 16) | (0xc130 >> 2),
214 (0x0e00 << 16) | (0xc134 >> 2),
216 (0x0e00 << 16) | (0xc1fc >> 2),
218 (0x0e00 << 16) | (0xc208 >> 2),
220 (0x0e00 << 16) | (0xc264 >> 2),
222 (0x0e00 << 16) | (0xc268 >> 2),
224 (0x0e00 << 16) | (0xc26c >> 2),
226 (0x0e00 << 16) | (0xc270 >> 2),
228 (0x0e00 << 16) | (0xc274 >> 2),
230 (0x0e00 << 16) | (0xc278 >> 2),
232 (0x0e00 << 16) | (0xc27c >> 2),
234 (0x0e00 << 16) | (0xc280 >> 2),
236 (0x0e00 << 16) | (0xc284 >> 2),
238 (0x0e00 << 16) | (0xc288 >> 2),
240 (0x0e00 << 16) | (0xc28c >> 2),
242 (0x0e00 << 16) | (0xc290 >> 2),
244 (0x0e00 << 16) | (0xc294 >> 2),
246 (0x0e00 << 16) | (0xc298 >> 2),
248 (0x0e00 << 16) | (0xc29c >> 2),
250 (0x0e00 << 16) | (0xc2a0 >> 2),
252 (0x0e00 << 16) | (0xc2a4 >> 2),
254 (0x0e00 << 16) | (0xc2a8 >> 2),
256 (0x0e00 << 16) | (0xc2ac >> 2),
258 (0x0e00 << 16) | (0xc2b0 >> 2),
260 (0x0e00 << 16) | (0x301d0 >> 2),
262 (0x0e00 << 16) | (0x30238 >> 2),
264 (0x0e00 << 16) | (0x30250 >> 2),
266 (0x0e00 << 16) | (0x30254 >> 2),
268 (0x0e00 << 16) | (0x30258 >> 2),
270 (0x0e00 << 16) | (0x3025c >> 2),
272 (0x4e00 << 16) | (0xc900 >> 2),
274 (0x5e00 << 16) | (0xc900 >> 2),
276 (0x6e00 << 16) | (0xc900 >> 2),
278 (0x7e00 << 16) | (0xc900 >> 2),
280 (0x8e00 << 16) | (0xc900 >> 2),
282 (0x9e00 << 16) | (0xc900 >> 2),
284 (0xae00 << 16) | (0xc900 >> 2),
286 (0xbe00 << 16) | (0xc900 >> 2),
288 (0x4e00 << 16) | (0xc904 >> 2),
290 (0x5e00 << 16) | (0xc904 >> 2),
292 (0x6e00 << 16) | (0xc904 >> 2),
294 (0x7e00 << 16) | (0xc904 >> 2),
296 (0x8e00 << 16) | (0xc904 >> 2),
298 (0x9e00 << 16) | (0xc904 >> 2),
300 (0xae00 << 16) | (0xc904 >> 2),
302 (0xbe00 << 16) | (0xc904 >> 2),
304 (0x4e00 << 16) | (0xc908 >> 2),
306 (0x5e00 << 16) | (0xc908 >> 2),
308 (0x6e00 << 16) | (0xc908 >> 2),
310 (0x7e00 << 16) | (0xc908 >> 2),
312 (0x8e00 << 16) | (0xc908 >> 2),
314 (0x9e00 << 16) | (0xc908 >> 2),
316 (0xae00 << 16) | (0xc908 >> 2),
318 (0xbe00 << 16) | (0xc908 >> 2),
320 (0x4e00 << 16) | (0xc90c >> 2),
322 (0x5e00 << 16) | (0xc90c >> 2),
324 (0x6e00 << 16) | (0xc90c >> 2),
326 (0x7e00 << 16) | (0xc90c >> 2),
328 (0x8e00 << 16) | (0xc90c >> 2),
330 (0x9e00 << 16) | (0xc90c >> 2),
332 (0xae00 << 16) | (0xc90c >> 2),
334 (0xbe00 << 16) | (0xc90c >> 2),
336 (0x4e00 << 16) | (0xc910 >> 2),
338 (0x5e00 << 16) | (0xc910 >> 2),
340 (0x6e00 << 16) | (0xc910 >> 2),
342 (0x7e00 << 16) | (0xc910 >> 2),
344 (0x8e00 << 16) | (0xc910 >> 2),
346 (0x9e00 << 16) | (0xc910 >> 2),
348 (0xae00 << 16) | (0xc910 >> 2),
350 (0xbe00 << 16) | (0xc910 >> 2),
352 (0x0e00 << 16) | (0xc99c >> 2),
354 (0x0e00 << 16) | (0x9834 >> 2),
356 (0x0000 << 16) | (0x30f00 >> 2),
358 (0x0001 << 16) | (0x30f00 >> 2),
360 (0x0000 << 16) | (0x30f04 >> 2),
362 (0x0001 << 16) | (0x30f04 >> 2),
364 (0x0000 << 16) | (0x30f08 >> 2),
366 (0x0001 << 16) | (0x30f08 >> 2),
368 (0x0000 << 16) | (0x30f0c >> 2),
370 (0x0001 << 16) | (0x30f0c >> 2),
372 (0x0600 << 16) | (0x9b7c >> 2),
374 (0x0e00 << 16) | (0x8a14 >> 2),
376 (0x0e00 << 16) | (0x8a18 >> 2),
378 (0x0600 << 16) | (0x30a00 >> 2),
380 (0x0e00 << 16) | (0x8bf0 >> 2),
382 (0x0e00 << 16) | (0x8bcc >> 2),
384 (0x0e00 << 16) | (0x8b24 >> 2),
386 (0x0e00 << 16) | (0x30a04 >> 2),
388 (0x0600 << 16) | (0x30a10 >> 2),
390 (0x0600 << 16) | (0x30a14 >> 2),
392 (0x0600 << 16) | (0x30a18 >> 2),
394 (0x0600 << 16) | (0x30a2c >> 2),
396 (0x0e00 << 16) | (0xc700 >> 2),
398 (0x0e00 << 16) | (0xc704 >> 2),
400 (0x0e00 << 16) | (0xc708 >> 2),
402 (0x0e00 << 16) | (0xc768 >> 2),
404 (0x0400 << 16) | (0xc770 >> 2),
406 (0x0400 << 16) | (0xc774 >> 2),
408 (0x0400 << 16) | (0xc778 >> 2),
410 (0x0400 << 16) | (0xc77c >> 2),
412 (0x0400 << 16) | (0xc780 >> 2),
414 (0x0400 << 16) | (0xc784 >> 2),
416 (0x0400 << 16) | (0xc788 >> 2),
418 (0x0400 << 16) | (0xc78c >> 2),
420 (0x0400 << 16) | (0xc798 >> 2),
422 (0x0400 << 16) | (0xc79c >> 2),
424 (0x0400 << 16) | (0xc7a0 >> 2),
426 (0x0400 << 16) | (0xc7a4 >> 2),
428 (0x0400 << 16) | (0xc7a8 >> 2),
430 (0x0400 << 16) | (0xc7ac >> 2),
432 (0x0400 << 16) | (0xc7b0 >> 2),
434 (0x0400 << 16) | (0xc7b4 >> 2),
436 (0x0e00 << 16) | (0x9100 >> 2),
438 (0x0e00 << 16) | (0x3c010 >> 2),
440 (0x0e00 << 16) | (0x92a8 >> 2),
442 (0x0e00 << 16) | (0x92ac >> 2),
444 (0x0e00 << 16) | (0x92b4 >> 2),
446 (0x0e00 << 16) | (0x92b8 >> 2),
448 (0x0e00 << 16) | (0x92bc >> 2),
450 (0x0e00 << 16) | (0x92c0 >> 2),
452 (0x0e00 << 16) | (0x92c4 >> 2),
454 (0x0e00 << 16) | (0x92c8 >> 2),
456 (0x0e00 << 16) | (0x92cc >> 2),
458 (0x0e00 << 16) | (0x92d0 >> 2),
460 (0x0e00 << 16) | (0x8c00 >> 2),
462 (0x0e00 << 16) | (0x8c04 >> 2),
464 (0x0e00 << 16) | (0x8c20 >> 2),
466 (0x0e00 << 16) | (0x8c38 >> 2),
468 (0x0e00 << 16) | (0x8c3c >> 2),
470 (0x0e00 << 16) | (0xae00 >> 2),
472 (0x0e00 << 16) | (0x9604 >> 2),
474 (0x0e00 << 16) | (0xac08 >> 2),
476 (0x0e00 << 16) | (0xac0c >> 2),
478 (0x0e00 << 16) | (0xac10 >> 2),
480 (0x0e00 << 16) | (0xac14 >> 2),
482 (0x0e00 << 16) | (0xac58 >> 2),
484 (0x0e00 << 16) | (0xac68 >> 2),
486 (0x0e00 << 16) | (0xac6c >> 2),
488 (0x0e00 << 16) | (0xac70 >> 2),
490 (0x0e00 << 16) | (0xac74 >> 2),
492 (0x0e00 << 16) | (0xac78 >> 2),
494 (0x0e00 << 16) | (0xac7c >> 2),
496 (0x0e00 << 16) | (0xac80 >> 2),
498 (0x0e00 << 16) | (0xac84 >> 2),
500 (0x0e00 << 16) | (0xac88 >> 2),
502 (0x0e00 << 16) | (0xac8c >> 2),
504 (0x0e00 << 16) | (0x970c >> 2),
506 (0x0e00 << 16) | (0x9714 >> 2),
508 (0x0e00 << 16) | (0x9718 >> 2),
510 (0x0e00 << 16) | (0x971c >> 2),
512 (0x0e00 << 16) | (0x31068 >> 2),
514 (0x4e00 << 16) | (0x31068 >> 2),
516 (0x5e00 << 16) | (0x31068 >> 2),
518 (0x6e00 << 16) | (0x31068 >> 2),
520 (0x7e00 << 16) | (0x31068 >> 2),
522 (0x8e00 << 16) | (0x31068 >> 2),
524 (0x9e00 << 16) | (0x31068 >> 2),
526 (0xae00 << 16) | (0x31068 >> 2),
528 (0xbe00 << 16) | (0x31068 >> 2),
530 (0x0e00 << 16) | (0xcd10 >> 2),
532 (0x0e00 << 16) | (0xcd14 >> 2),
534 (0x0e00 << 16) | (0x88b0 >> 2),
536 (0x0e00 << 16) | (0x88b4 >> 2),
538 (0x0e00 << 16) | (0x88b8 >> 2),
540 (0x0e00 << 16) | (0x88bc >> 2),
542 (0x0400 << 16) | (0x89c0 >> 2),
544 (0x0e00 << 16) | (0x88c4 >> 2),
546 (0x0e00 << 16) | (0x88c8 >> 2),
548 (0x0e00 << 16) | (0x88d0 >> 2),
550 (0x0e00 << 16) | (0x88d4 >> 2),
552 (0x0e00 << 16) | (0x88d8 >> 2),
554 (0x0e00 << 16) | (0x8980 >> 2),
556 (0x0e00 << 16) | (0x30938 >> 2),
558 (0x0e00 << 16) | (0x3093c >> 2),
560 (0x0e00 << 16) | (0x30940 >> 2),
562 (0x0e00 << 16) | (0x89a0 >> 2),
564 (0x0e00 << 16) | (0x30900 >> 2),
566 (0x0e00 << 16) | (0x30904 >> 2),
568 (0x0e00 << 16) | (0x89b4 >> 2),
570 (0x0e00 << 16) | (0x3c210 >> 2),
572 (0x0e00 << 16) | (0x3c214 >> 2),
574 (0x0e00 << 16) | (0x3c218 >> 2),
576 (0x0e00 << 16) | (0x8904 >> 2),
579 (0x0e00 << 16) | (0x8c28 >> 2),
580 (0x0e00 << 16) | (0x8c2c >> 2),
581 (0x0e00 << 16) | (0x8c30 >> 2),
582 (0x0e00 << 16) | (0x8c34 >> 2),
583 (0x0e00 << 16) | (0x9600 >> 2),
586 static const u32 kalindi_rlc_save_restore_register_list[] =
588 (0x0e00 << 16) | (0xc12c >> 2),
590 (0x0e00 << 16) | (0xc140 >> 2),
592 (0x0e00 << 16) | (0xc150 >> 2),
594 (0x0e00 << 16) | (0xc15c >> 2),
596 (0x0e00 << 16) | (0xc168 >> 2),
598 (0x0e00 << 16) | (0xc170 >> 2),
600 (0x0e00 << 16) | (0xc204 >> 2),
602 (0x0e00 << 16) | (0xc2b4 >> 2),
604 (0x0e00 << 16) | (0xc2b8 >> 2),
606 (0x0e00 << 16) | (0xc2bc >> 2),
608 (0x0e00 << 16) | (0xc2c0 >> 2),
610 (0x0e00 << 16) | (0x8228 >> 2),
612 (0x0e00 << 16) | (0x829c >> 2),
614 (0x0e00 << 16) | (0x869c >> 2),
616 (0x0600 << 16) | (0x98f4 >> 2),
618 (0x0e00 << 16) | (0x98f8 >> 2),
620 (0x0e00 << 16) | (0x9900 >> 2),
622 (0x0e00 << 16) | (0xc260 >> 2),
624 (0x0e00 << 16) | (0x90e8 >> 2),
626 (0x0e00 << 16) | (0x3c000 >> 2),
628 (0x0e00 << 16) | (0x3c00c >> 2),
630 (0x0e00 << 16) | (0x8c1c >> 2),
632 (0x0e00 << 16) | (0x9700 >> 2),
634 (0x0e00 << 16) | (0xcd20 >> 2),
636 (0x4e00 << 16) | (0xcd20 >> 2),
638 (0x5e00 << 16) | (0xcd20 >> 2),
640 (0x6e00 << 16) | (0xcd20 >> 2),
642 (0x7e00 << 16) | (0xcd20 >> 2),
644 (0x0e00 << 16) | (0x89bc >> 2),
646 (0x0e00 << 16) | (0x8900 >> 2),
649 (0x0e00 << 16) | (0xc130 >> 2),
651 (0x0e00 << 16) | (0xc134 >> 2),
653 (0x0e00 << 16) | (0xc1fc >> 2),
655 (0x0e00 << 16) | (0xc208 >> 2),
657 (0x0e00 << 16) | (0xc264 >> 2),
659 (0x0e00 << 16) | (0xc268 >> 2),
661 (0x0e00 << 16) | (0xc26c >> 2),
663 (0x0e00 << 16) | (0xc270 >> 2),
665 (0x0e00 << 16) | (0xc274 >> 2),
667 (0x0e00 << 16) | (0xc28c >> 2),
669 (0x0e00 << 16) | (0xc290 >> 2),
671 (0x0e00 << 16) | (0xc294 >> 2),
673 (0x0e00 << 16) | (0xc298 >> 2),
675 (0x0e00 << 16) | (0xc2a0 >> 2),
677 (0x0e00 << 16) | (0xc2a4 >> 2),
679 (0x0e00 << 16) | (0xc2a8 >> 2),
681 (0x0e00 << 16) | (0xc2ac >> 2),
683 (0x0e00 << 16) | (0x301d0 >> 2),
685 (0x0e00 << 16) | (0x30238 >> 2),
687 (0x0e00 << 16) | (0x30250 >> 2),
689 (0x0e00 << 16) | (0x30254 >> 2),
691 (0x0e00 << 16) | (0x30258 >> 2),
693 (0x0e00 << 16) | (0x3025c >> 2),
695 (0x4e00 << 16) | (0xc900 >> 2),
697 (0x5e00 << 16) | (0xc900 >> 2),
699 (0x6e00 << 16) | (0xc900 >> 2),
701 (0x7e00 << 16) | (0xc900 >> 2),
703 (0x4e00 << 16) | (0xc904 >> 2),
705 (0x5e00 << 16) | (0xc904 >> 2),
707 (0x6e00 << 16) | (0xc904 >> 2),
709 (0x7e00 << 16) | (0xc904 >> 2),
711 (0x4e00 << 16) | (0xc908 >> 2),
713 (0x5e00 << 16) | (0xc908 >> 2),
715 (0x6e00 << 16) | (0xc908 >> 2),
717 (0x7e00 << 16) | (0xc908 >> 2),
719 (0x4e00 << 16) | (0xc90c >> 2),
721 (0x5e00 << 16) | (0xc90c >> 2),
723 (0x6e00 << 16) | (0xc90c >> 2),
725 (0x7e00 << 16) | (0xc90c >> 2),
727 (0x4e00 << 16) | (0xc910 >> 2),
729 (0x5e00 << 16) | (0xc910 >> 2),
731 (0x6e00 << 16) | (0xc910 >> 2),
733 (0x7e00 << 16) | (0xc910 >> 2),
735 (0x0e00 << 16) | (0xc99c >> 2),
737 (0x0e00 << 16) | (0x9834 >> 2),
739 (0x0000 << 16) | (0x30f00 >> 2),
741 (0x0000 << 16) | (0x30f04 >> 2),
743 (0x0000 << 16) | (0x30f08 >> 2),
745 (0x0000 << 16) | (0x30f0c >> 2),
747 (0x0600 << 16) | (0x9b7c >> 2),
749 (0x0e00 << 16) | (0x8a14 >> 2),
751 (0x0e00 << 16) | (0x8a18 >> 2),
753 (0x0600 << 16) | (0x30a00 >> 2),
755 (0x0e00 << 16) | (0x8bf0 >> 2),
757 (0x0e00 << 16) | (0x8bcc >> 2),
759 (0x0e00 << 16) | (0x8b24 >> 2),
761 (0x0e00 << 16) | (0x30a04 >> 2),
763 (0x0600 << 16) | (0x30a10 >> 2),
765 (0x0600 << 16) | (0x30a14 >> 2),
767 (0x0600 << 16) | (0x30a18 >> 2),
769 (0x0600 << 16) | (0x30a2c >> 2),
771 (0x0e00 << 16) | (0xc700 >> 2),
773 (0x0e00 << 16) | (0xc704 >> 2),
775 (0x0e00 << 16) | (0xc708 >> 2),
777 (0x0e00 << 16) | (0xc768 >> 2),
779 (0x0400 << 16) | (0xc770 >> 2),
781 (0x0400 << 16) | (0xc774 >> 2),
783 (0x0400 << 16) | (0xc798 >> 2),
785 (0x0400 << 16) | (0xc79c >> 2),
787 (0x0e00 << 16) | (0x9100 >> 2),
789 (0x0e00 << 16) | (0x3c010 >> 2),
791 (0x0e00 << 16) | (0x8c00 >> 2),
793 (0x0e00 << 16) | (0x8c04 >> 2),
795 (0x0e00 << 16) | (0x8c20 >> 2),
797 (0x0e00 << 16) | (0x8c38 >> 2),
799 (0x0e00 << 16) | (0x8c3c >> 2),
801 (0x0e00 << 16) | (0xae00 >> 2),
803 (0x0e00 << 16) | (0x9604 >> 2),
805 (0x0e00 << 16) | (0xac08 >> 2),
807 (0x0e00 << 16) | (0xac0c >> 2),
809 (0x0e00 << 16) | (0xac10 >> 2),
811 (0x0e00 << 16) | (0xac14 >> 2),
813 (0x0e00 << 16) | (0xac58 >> 2),
815 (0x0e00 << 16) | (0xac68 >> 2),
817 (0x0e00 << 16) | (0xac6c >> 2),
819 (0x0e00 << 16) | (0xac70 >> 2),
821 (0x0e00 << 16) | (0xac74 >> 2),
823 (0x0e00 << 16) | (0xac78 >> 2),
825 (0x0e00 << 16) | (0xac7c >> 2),
827 (0x0e00 << 16) | (0xac80 >> 2),
829 (0x0e00 << 16) | (0xac84 >> 2),
831 (0x0e00 << 16) | (0xac88 >> 2),
833 (0x0e00 << 16) | (0xac8c >> 2),
835 (0x0e00 << 16) | (0x970c >> 2),
837 (0x0e00 << 16) | (0x9714 >> 2),
839 (0x0e00 << 16) | (0x9718 >> 2),
841 (0x0e00 << 16) | (0x971c >> 2),
843 (0x0e00 << 16) | (0x31068 >> 2),
845 (0x4e00 << 16) | (0x31068 >> 2),
847 (0x5e00 << 16) | (0x31068 >> 2),
849 (0x6e00 << 16) | (0x31068 >> 2),
851 (0x7e00 << 16) | (0x31068 >> 2),
853 (0x0e00 << 16) | (0xcd10 >> 2),
855 (0x0e00 << 16) | (0xcd14 >> 2),
857 (0x0e00 << 16) | (0x88b0 >> 2),
859 (0x0e00 << 16) | (0x88b4 >> 2),
861 (0x0e00 << 16) | (0x88b8 >> 2),
863 (0x0e00 << 16) | (0x88bc >> 2),
865 (0x0400 << 16) | (0x89c0 >> 2),
867 (0x0e00 << 16) | (0x88c4 >> 2),
869 (0x0e00 << 16) | (0x88c8 >> 2),
871 (0x0e00 << 16) | (0x88d0 >> 2),
873 (0x0e00 << 16) | (0x88d4 >> 2),
875 (0x0e00 << 16) | (0x88d8 >> 2),
877 (0x0e00 << 16) | (0x8980 >> 2),
879 (0x0e00 << 16) | (0x30938 >> 2),
881 (0x0e00 << 16) | (0x3093c >> 2),
883 (0x0e00 << 16) | (0x30940 >> 2),
885 (0x0e00 << 16) | (0x89a0 >> 2),
887 (0x0e00 << 16) | (0x30900 >> 2),
889 (0x0e00 << 16) | (0x30904 >> 2),
891 (0x0e00 << 16) | (0x89b4 >> 2),
893 (0x0e00 << 16) | (0x3e1fc >> 2),
895 (0x0e00 << 16) | (0x3c210 >> 2),
897 (0x0e00 << 16) | (0x3c214 >> 2),
899 (0x0e00 << 16) | (0x3c218 >> 2),
901 (0x0e00 << 16) | (0x8904 >> 2),
904 (0x0e00 << 16) | (0x8c28 >> 2),
905 (0x0e00 << 16) | (0x8c2c >> 2),
906 (0x0e00 << 16) | (0x8c30 >> 2),
907 (0x0e00 << 16) | (0x8c34 >> 2),
908 (0x0e00 << 16) | (0x9600 >> 2),
911 static const u32 bonaire_golden_spm_registers[] =
913 0x30800, 0xe0ffffff, 0xe0000000
916 static const u32 bonaire_golden_common_registers[] =
918 0xc770, 0xffffffff, 0x00000800,
919 0xc774, 0xffffffff, 0x00000800,
920 0xc798, 0xffffffff, 0x00007fbf,
921 0xc79c, 0xffffffff, 0x00007faf
924 static const u32 bonaire_golden_registers[] =
926 0x3354, 0x00000333, 0x00000333,
927 0x3350, 0x000c0fc0, 0x00040200,
928 0x9a10, 0x00010000, 0x00058208,
929 0x3c000, 0xffff1fff, 0x00140000,
930 0x3c200, 0xfdfc0fff, 0x00000100,
931 0x3c234, 0x40000000, 0x40000200,
932 0x9830, 0xffffffff, 0x00000000,
933 0x9834, 0xf00fffff, 0x00000400,
934 0x9838, 0x0002021c, 0x00020200,
935 0xc78, 0x00000080, 0x00000000,
936 0x5bb0, 0x000000f0, 0x00000070,
937 0x5bc0, 0xf0311fff, 0x80300000,
938 0x98f8, 0x73773777, 0x12010001,
939 0x350c, 0x00810000, 0x408af000,
940 0x7030, 0x31000111, 0x00000011,
941 0x2f48, 0x73773777, 0x12010001,
942 0x220c, 0x00007fb6, 0x0021a1b1,
943 0x2210, 0x00007fb6, 0x002021b1,
944 0x2180, 0x00007fb6, 0x00002191,
945 0x2218, 0x00007fb6, 0x002121b1,
946 0x221c, 0x00007fb6, 0x002021b1,
947 0x21dc, 0x00007fb6, 0x00002191,
948 0x21e0, 0x00007fb6, 0x00002191,
949 0x3628, 0x0000003f, 0x0000000a,
950 0x362c, 0x0000003f, 0x0000000a,
951 0x2ae4, 0x00073ffe, 0x000022a2,
952 0x240c, 0x000007ff, 0x00000000,
953 0x8a14, 0xf000003f, 0x00000007,
954 0x8bf0, 0x00002001, 0x00000001,
955 0x8b24, 0xffffffff, 0x00ffffff,
956 0x30a04, 0x0000ff0f, 0x00000000,
957 0x28a4c, 0x07ffffff, 0x06000000,
958 0x4d8, 0x00000fff, 0x00000100,
959 0x3e78, 0x00000001, 0x00000002,
960 0x9100, 0x03000000, 0x0362c688,
961 0x8c00, 0x000000ff, 0x00000001,
962 0xe40, 0x00001fff, 0x00001fff,
963 0x9060, 0x0000007f, 0x00000020,
964 0x9508, 0x00010000, 0x00010000,
965 0xac14, 0x000003ff, 0x000000f3,
966 0xac0c, 0xffffffff, 0x00001032
969 static const u32 bonaire_mgcg_cgcg_init[] =
971 0xc420, 0xffffffff, 0xfffffffc,
972 0x30800, 0xffffffff, 0xe0000000,
973 0x3c2a0, 0xffffffff, 0x00000100,
974 0x3c208, 0xffffffff, 0x00000100,
975 0x3c2c0, 0xffffffff, 0xc0000100,
976 0x3c2c8, 0xffffffff, 0xc0000100,
977 0x3c2c4, 0xffffffff, 0xc0000100,
978 0x55e4, 0xffffffff, 0x00600100,
979 0x3c280, 0xffffffff, 0x00000100,
980 0x3c214, 0xffffffff, 0x06000100,
981 0x3c220, 0xffffffff, 0x00000100,
982 0x3c218, 0xffffffff, 0x06000100,
983 0x3c204, 0xffffffff, 0x00000100,
984 0x3c2e0, 0xffffffff, 0x00000100,
985 0x3c224, 0xffffffff, 0x00000100,
986 0x3c200, 0xffffffff, 0x00000100,
987 0x3c230, 0xffffffff, 0x00000100,
988 0x3c234, 0xffffffff, 0x00000100,
989 0x3c250, 0xffffffff, 0x00000100,
990 0x3c254, 0xffffffff, 0x00000100,
991 0x3c258, 0xffffffff, 0x00000100,
992 0x3c25c, 0xffffffff, 0x00000100,
993 0x3c260, 0xffffffff, 0x00000100,
994 0x3c27c, 0xffffffff, 0x00000100,
995 0x3c278, 0xffffffff, 0x00000100,
996 0x3c210, 0xffffffff, 0x06000100,
997 0x3c290, 0xffffffff, 0x00000100,
998 0x3c274, 0xffffffff, 0x00000100,
999 0x3c2b4, 0xffffffff, 0x00000100,
1000 0x3c2b0, 0xffffffff, 0x00000100,
1001 0x3c270, 0xffffffff, 0x00000100,
1002 0x30800, 0xffffffff, 0xe0000000,
1003 0x3c020, 0xffffffff, 0x00010000,
1004 0x3c024, 0xffffffff, 0x00030002,
1005 0x3c028, 0xffffffff, 0x00040007,
1006 0x3c02c, 0xffffffff, 0x00060005,
1007 0x3c030, 0xffffffff, 0x00090008,
1008 0x3c034, 0xffffffff, 0x00010000,
1009 0x3c038, 0xffffffff, 0x00030002,
1010 0x3c03c, 0xffffffff, 0x00040007,
1011 0x3c040, 0xffffffff, 0x00060005,
1012 0x3c044, 0xffffffff, 0x00090008,
1013 0x3c048, 0xffffffff, 0x00010000,
1014 0x3c04c, 0xffffffff, 0x00030002,
1015 0x3c050, 0xffffffff, 0x00040007,
1016 0x3c054, 0xffffffff, 0x00060005,
1017 0x3c058, 0xffffffff, 0x00090008,
1018 0x3c05c, 0xffffffff, 0x00010000,
1019 0x3c060, 0xffffffff, 0x00030002,
1020 0x3c064, 0xffffffff, 0x00040007,
1021 0x3c068, 0xffffffff, 0x00060005,
1022 0x3c06c, 0xffffffff, 0x00090008,
1023 0x3c070, 0xffffffff, 0x00010000,
1024 0x3c074, 0xffffffff, 0x00030002,
1025 0x3c078, 0xffffffff, 0x00040007,
1026 0x3c07c, 0xffffffff, 0x00060005,
1027 0x3c080, 0xffffffff, 0x00090008,
1028 0x3c084, 0xffffffff, 0x00010000,
1029 0x3c088, 0xffffffff, 0x00030002,
1030 0x3c08c, 0xffffffff, 0x00040007,
1031 0x3c090, 0xffffffff, 0x00060005,
1032 0x3c094, 0xffffffff, 0x00090008,
1033 0x3c098, 0xffffffff, 0x00010000,
1034 0x3c09c, 0xffffffff, 0x00030002,
1035 0x3c0a0, 0xffffffff, 0x00040007,
1036 0x3c0a4, 0xffffffff, 0x00060005,
1037 0x3c0a8, 0xffffffff, 0x00090008,
1038 0x3c000, 0xffffffff, 0x96e00200,
1039 0x8708, 0xffffffff, 0x00900100,
1040 0xc424, 0xffffffff, 0x0020003f,
1041 0x38, 0xffffffff, 0x0140001c,
1042 0x3c, 0x000f0000, 0x000f0000,
1043 0x220, 0xffffffff, 0xC060000C,
1044 0x224, 0xc0000fff, 0x00000100,
1045 0xf90, 0xffffffff, 0x00000100,
1046 0xf98, 0x00000101, 0x00000000,
1047 0x20a8, 0xffffffff, 0x00000104,
1048 0x55e4, 0xff000fff, 0x00000100,
1049 0x30cc, 0xc0000fff, 0x00000104,
1050 0xc1e4, 0x00000001, 0x00000001,
1051 0xd00c, 0xff000ff0, 0x00000100,
1052 0xd80c, 0xff000ff0, 0x00000100
1055 static const u32 spectre_golden_spm_registers[] =
1057 0x30800, 0xe0ffffff, 0xe0000000
1060 static const u32 spectre_golden_common_registers[] =
1062 0xc770, 0xffffffff, 0x00000800,
1063 0xc774, 0xffffffff, 0x00000800,
1064 0xc798, 0xffffffff, 0x00007fbf,
1065 0xc79c, 0xffffffff, 0x00007faf
1068 static const u32 spectre_golden_registers[] =
1070 0x3c000, 0xffff1fff, 0x96940200,
1071 0x3c00c, 0xffff0001, 0xff000000,
1072 0x3c200, 0xfffc0fff, 0x00000100,
1073 0x6ed8, 0x00010101, 0x00010000,
1074 0x9834, 0xf00fffff, 0x00000400,
1075 0x9838, 0xfffffffc, 0x00020200,
1076 0x5bb0, 0x000000f0, 0x00000070,
1077 0x5bc0, 0xf0311fff, 0x80300000,
1078 0x98f8, 0x73773777, 0x12010001,
1079 0x9b7c, 0x00ff0000, 0x00fc0000,
1080 0x2f48, 0x73773777, 0x12010001,
1081 0x8a14, 0xf000003f, 0x00000007,
1082 0x8b24, 0xffffffff, 0x00ffffff,
1083 0x28350, 0x3f3f3fff, 0x00000082,
1084 0x28355, 0x0000003f, 0x00000000,
1085 0x3e78, 0x00000001, 0x00000002,
1086 0x913c, 0xffff03df, 0x00000004,
1087 0xc768, 0x00000008, 0x00000008,
1088 0x8c00, 0x000008ff, 0x00000800,
1089 0x9508, 0x00010000, 0x00010000,
1090 0xac0c, 0xffffffff, 0x54763210,
1091 0x214f8, 0x01ff01ff, 0x00000002,
1092 0x21498, 0x007ff800, 0x00200000,
1093 0x2015c, 0xffffffff, 0x00000f40,
1094 0x30934, 0xffffffff, 0x00000001
1097 static const u32 spectre_mgcg_cgcg_init[] =
1099 0xc420, 0xffffffff, 0xfffffffc,
1100 0x30800, 0xffffffff, 0xe0000000,
1101 0x3c2a0, 0xffffffff, 0x00000100,
1102 0x3c208, 0xffffffff, 0x00000100,
1103 0x3c2c0, 0xffffffff, 0x00000100,
1104 0x3c2c8, 0xffffffff, 0x00000100,
1105 0x3c2c4, 0xffffffff, 0x00000100,
1106 0x55e4, 0xffffffff, 0x00600100,
1107 0x3c280, 0xffffffff, 0x00000100,
1108 0x3c214, 0xffffffff, 0x06000100,
1109 0x3c220, 0xffffffff, 0x00000100,
1110 0x3c218, 0xffffffff, 0x06000100,
1111 0x3c204, 0xffffffff, 0x00000100,
1112 0x3c2e0, 0xffffffff, 0x00000100,
1113 0x3c224, 0xffffffff, 0x00000100,
1114 0x3c200, 0xffffffff, 0x00000100,
1115 0x3c230, 0xffffffff, 0x00000100,
1116 0x3c234, 0xffffffff, 0x00000100,
1117 0x3c250, 0xffffffff, 0x00000100,
1118 0x3c254, 0xffffffff, 0x00000100,
1119 0x3c258, 0xffffffff, 0x00000100,
1120 0x3c25c, 0xffffffff, 0x00000100,
1121 0x3c260, 0xffffffff, 0x00000100,
1122 0x3c27c, 0xffffffff, 0x00000100,
1123 0x3c278, 0xffffffff, 0x00000100,
1124 0x3c210, 0xffffffff, 0x06000100,
1125 0x3c290, 0xffffffff, 0x00000100,
1126 0x3c274, 0xffffffff, 0x00000100,
1127 0x3c2b4, 0xffffffff, 0x00000100,
1128 0x3c2b0, 0xffffffff, 0x00000100,
1129 0x3c270, 0xffffffff, 0x00000100,
1130 0x30800, 0xffffffff, 0xe0000000,
1131 0x3c020, 0xffffffff, 0x00010000,
1132 0x3c024, 0xffffffff, 0x00030002,
1133 0x3c028, 0xffffffff, 0x00040007,
1134 0x3c02c, 0xffffffff, 0x00060005,
1135 0x3c030, 0xffffffff, 0x00090008,
1136 0x3c034, 0xffffffff, 0x00010000,
1137 0x3c038, 0xffffffff, 0x00030002,
1138 0x3c03c, 0xffffffff, 0x00040007,
1139 0x3c040, 0xffffffff, 0x00060005,
1140 0x3c044, 0xffffffff, 0x00090008,
1141 0x3c048, 0xffffffff, 0x00010000,
1142 0x3c04c, 0xffffffff, 0x00030002,
1143 0x3c050, 0xffffffff, 0x00040007,
1144 0x3c054, 0xffffffff, 0x00060005,
1145 0x3c058, 0xffffffff, 0x00090008,
1146 0x3c05c, 0xffffffff, 0x00010000,
1147 0x3c060, 0xffffffff, 0x00030002,
1148 0x3c064, 0xffffffff, 0x00040007,
1149 0x3c068, 0xffffffff, 0x00060005,
1150 0x3c06c, 0xffffffff, 0x00090008,
1151 0x3c070, 0xffffffff, 0x00010000,
1152 0x3c074, 0xffffffff, 0x00030002,
1153 0x3c078, 0xffffffff, 0x00040007,
1154 0x3c07c, 0xffffffff, 0x00060005,
1155 0x3c080, 0xffffffff, 0x00090008,
1156 0x3c084, 0xffffffff, 0x00010000,
1157 0x3c088, 0xffffffff, 0x00030002,
1158 0x3c08c, 0xffffffff, 0x00040007,
1159 0x3c090, 0xffffffff, 0x00060005,
1160 0x3c094, 0xffffffff, 0x00090008,
1161 0x3c098, 0xffffffff, 0x00010000,
1162 0x3c09c, 0xffffffff, 0x00030002,
1163 0x3c0a0, 0xffffffff, 0x00040007,
1164 0x3c0a4, 0xffffffff, 0x00060005,
1165 0x3c0a8, 0xffffffff, 0x00090008,
1166 0x3c0ac, 0xffffffff, 0x00010000,
1167 0x3c0b0, 0xffffffff, 0x00030002,
1168 0x3c0b4, 0xffffffff, 0x00040007,
1169 0x3c0b8, 0xffffffff, 0x00060005,
1170 0x3c0bc, 0xffffffff, 0x00090008,
1171 0x3c000, 0xffffffff, 0x96e00200,
1172 0x8708, 0xffffffff, 0x00900100,
1173 0xc424, 0xffffffff, 0x0020003f,
1174 0x38, 0xffffffff, 0x0140001c,
1175 0x3c, 0x000f0000, 0x000f0000,
1176 0x220, 0xffffffff, 0xC060000C,
1177 0x224, 0xc0000fff, 0x00000100,
1178 0xf90, 0xffffffff, 0x00000100,
1179 0xf98, 0x00000101, 0x00000000,
1180 0x20a8, 0xffffffff, 0x00000104,
1181 0x55e4, 0xff000fff, 0x00000100,
1182 0x30cc, 0xc0000fff, 0x00000104,
1183 0xc1e4, 0x00000001, 0x00000001,
1184 0xd00c, 0xff000ff0, 0x00000100,
1185 0xd80c, 0xff000ff0, 0x00000100
1188 static const u32 kalindi_golden_spm_registers[] =
1190 0x30800, 0xe0ffffff, 0xe0000000
1193 static const u32 kalindi_golden_common_registers[] =
1195 0xc770, 0xffffffff, 0x00000800,
1196 0xc774, 0xffffffff, 0x00000800,
1197 0xc798, 0xffffffff, 0x00007fbf,
1198 0xc79c, 0xffffffff, 0x00007faf
1201 static const u32 kalindi_golden_registers[] =
1203 0x3c000, 0xffffdfff, 0x6e944040,
1204 0x55e4, 0xff607fff, 0xfc000100,
1205 0x3c220, 0xff000fff, 0x00000100,
1206 0x3c224, 0xff000fff, 0x00000100,
1207 0x3c200, 0xfffc0fff, 0x00000100,
1208 0x6ed8, 0x00010101, 0x00010000,
1209 0x9830, 0xffffffff, 0x00000000,
1210 0x9834, 0xf00fffff, 0x00000400,
1211 0x5bb0, 0x000000f0, 0x00000070,
1212 0x5bc0, 0xf0311fff, 0x80300000,
1213 0x98f8, 0x73773777, 0x12010001,
1214 0x98fc, 0xffffffff, 0x00000010,
1215 0x9b7c, 0x00ff0000, 0x00fc0000,
1216 0x8030, 0x00001f0f, 0x0000100a,
1217 0x2f48, 0x73773777, 0x12010001,
1218 0x2408, 0x000fffff, 0x000c007f,
1219 0x8a14, 0xf000003f, 0x00000007,
1220 0x8b24, 0x3fff3fff, 0x00ffcfff,
1221 0x30a04, 0x0000ff0f, 0x00000000,
1222 0x28a4c, 0x07ffffff, 0x06000000,
1223 0x4d8, 0x00000fff, 0x00000100,
1224 0x3e78, 0x00000001, 0x00000002,
1225 0xc768, 0x00000008, 0x00000008,
1226 0x8c00, 0x000000ff, 0x00000003,
1227 0x214f8, 0x01ff01ff, 0x00000002,
1228 0x21498, 0x007ff800, 0x00200000,
1229 0x2015c, 0xffffffff, 0x00000f40,
1230 0x88c4, 0x001f3ae3, 0x00000082,
1231 0x88d4, 0x0000001f, 0x00000010,
1232 0x30934, 0xffffffff, 0x00000000
1235 static const u32 kalindi_mgcg_cgcg_init[] =
1237 0xc420, 0xffffffff, 0xfffffffc,
1238 0x30800, 0xffffffff, 0xe0000000,
1239 0x3c2a0, 0xffffffff, 0x00000100,
1240 0x3c208, 0xffffffff, 0x00000100,
1241 0x3c2c0, 0xffffffff, 0x00000100,
1242 0x3c2c8, 0xffffffff, 0x00000100,
1243 0x3c2c4, 0xffffffff, 0x00000100,
1244 0x55e4, 0xffffffff, 0x00600100,
1245 0x3c280, 0xffffffff, 0x00000100,
1246 0x3c214, 0xffffffff, 0x06000100,
1247 0x3c220, 0xffffffff, 0x00000100,
1248 0x3c218, 0xffffffff, 0x06000100,
1249 0x3c204, 0xffffffff, 0x00000100,
1250 0x3c2e0, 0xffffffff, 0x00000100,
1251 0x3c224, 0xffffffff, 0x00000100,
1252 0x3c200, 0xffffffff, 0x00000100,
1253 0x3c230, 0xffffffff, 0x00000100,
1254 0x3c234, 0xffffffff, 0x00000100,
1255 0x3c250, 0xffffffff, 0x00000100,
1256 0x3c254, 0xffffffff, 0x00000100,
1257 0x3c258, 0xffffffff, 0x00000100,
1258 0x3c25c, 0xffffffff, 0x00000100,
1259 0x3c260, 0xffffffff, 0x00000100,
1260 0x3c27c, 0xffffffff, 0x00000100,
1261 0x3c278, 0xffffffff, 0x00000100,
1262 0x3c210, 0xffffffff, 0x06000100,
1263 0x3c290, 0xffffffff, 0x00000100,
1264 0x3c274, 0xffffffff, 0x00000100,
1265 0x3c2b4, 0xffffffff, 0x00000100,
1266 0x3c2b0, 0xffffffff, 0x00000100,
1267 0x3c270, 0xffffffff, 0x00000100,
1268 0x30800, 0xffffffff, 0xe0000000,
1269 0x3c020, 0xffffffff, 0x00010000,
1270 0x3c024, 0xffffffff, 0x00030002,
1271 0x3c028, 0xffffffff, 0x00040007,
1272 0x3c02c, 0xffffffff, 0x00060005,
1273 0x3c030, 0xffffffff, 0x00090008,
1274 0x3c034, 0xffffffff, 0x00010000,
1275 0x3c038, 0xffffffff, 0x00030002,
1276 0x3c03c, 0xffffffff, 0x00040007,
1277 0x3c040, 0xffffffff, 0x00060005,
1278 0x3c044, 0xffffffff, 0x00090008,
1279 0x3c000, 0xffffffff, 0x96e00200,
1280 0x8708, 0xffffffff, 0x00900100,
1281 0xc424, 0xffffffff, 0x0020003f,
1282 0x38, 0xffffffff, 0x0140001c,
1283 0x3c, 0x000f0000, 0x000f0000,
1284 0x220, 0xffffffff, 0xC060000C,
1285 0x224, 0xc0000fff, 0x00000100,
1286 0x20a8, 0xffffffff, 0x00000104,
1287 0x55e4, 0xff000fff, 0x00000100,
1288 0x30cc, 0xc0000fff, 0x00000104,
1289 0xc1e4, 0x00000001, 0x00000001,
1290 0xd00c, 0xff000ff0, 0x00000100,
1291 0xd80c, 0xff000ff0, 0x00000100
1294 static void cik_init_golden_registers(struct radeon_device *rdev)
1296 switch (rdev->family) {
1298 radeon_program_register_sequence(rdev,
1299 bonaire_mgcg_cgcg_init,
1300 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301 radeon_program_register_sequence(rdev,
1302 bonaire_golden_registers,
1303 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304 radeon_program_register_sequence(rdev,
1305 bonaire_golden_common_registers,
1306 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307 radeon_program_register_sequence(rdev,
1308 bonaire_golden_spm_registers,
1309 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1312 radeon_program_register_sequence(rdev,
1313 kalindi_mgcg_cgcg_init,
1314 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315 radeon_program_register_sequence(rdev,
1316 kalindi_golden_registers,
1317 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318 radeon_program_register_sequence(rdev,
1319 kalindi_golden_common_registers,
1320 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_golden_spm_registers,
1323 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1326 radeon_program_register_sequence(rdev,
1327 spectre_mgcg_cgcg_init,
1328 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329 radeon_program_register_sequence(rdev,
1330 spectre_golden_registers,
1331 (const u32)ARRAY_SIZE(spectre_golden_registers));
1332 radeon_program_register_sequence(rdev,
1333 spectre_golden_common_registers,
1334 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335 radeon_program_register_sequence(rdev,
1336 spectre_golden_spm_registers,
1337 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1345 * cik_get_xclk - get the xclk
1347 * @rdev: radeon_device pointer
1349 * Returns the reference clock used by the gfx engine
1352 u32 cik_get_xclk(struct radeon_device *rdev)
1354 u32 reference_clock = rdev->clock.spll.reference_freq;
1356 if (rdev->flags & RADEON_IS_IGP) {
1357 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358 return reference_clock / 2;
1360 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361 return reference_clock / 4;
1363 return reference_clock;
1367 * cik_mm_rdoorbell - read a doorbell dword
1369 * @rdev: radeon_device pointer
1370 * @offset: byte offset into the aperture
1372 * Returns the value in the doorbell aperture at the
1373 * requested offset (CIK).
1375 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1377 if (offset < rdev->doorbell.size) {
1378 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1380 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1386 * cik_mm_wdoorbell - write a doorbell dword
1388 * @rdev: radeon_device pointer
1389 * @offset: byte offset into the aperture
1390 * @v: value to write
1392 * Writes @v to the doorbell aperture at the
1393 * requested offset (CIK).
1395 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1397 if (offset < rdev->doorbell.size) {
1398 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1400 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1404 #define BONAIRE_IO_MC_REGS_SIZE 36
1406 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1408 {0x00000070, 0x04400000},
1409 {0x00000071, 0x80c01803},
1410 {0x00000072, 0x00004004},
1411 {0x00000073, 0x00000100},
1412 {0x00000074, 0x00ff0000},
1413 {0x00000075, 0x34000000},
1414 {0x00000076, 0x08000014},
1415 {0x00000077, 0x00cc08ec},
1416 {0x00000078, 0x00000400},
1417 {0x00000079, 0x00000000},
1418 {0x0000007a, 0x04090000},
1419 {0x0000007c, 0x00000000},
1420 {0x0000007e, 0x4408a8e8},
1421 {0x0000007f, 0x00000304},
1422 {0x00000080, 0x00000000},
1423 {0x00000082, 0x00000001},
1424 {0x00000083, 0x00000002},
1425 {0x00000084, 0xf3e4f400},
1426 {0x00000085, 0x052024e3},
1427 {0x00000087, 0x00000000},
1428 {0x00000088, 0x01000000},
1429 {0x0000008a, 0x1c0a0000},
1430 {0x0000008b, 0xff010000},
1431 {0x0000008d, 0xffffefff},
1432 {0x0000008e, 0xfff3efff},
1433 {0x0000008f, 0xfff3efbf},
1434 {0x00000092, 0xf7ffffff},
1435 {0x00000093, 0xffffff7f},
1436 {0x00000095, 0x00101101},
1437 {0x00000096, 0x00000fff},
1438 {0x00000097, 0x00116fff},
1439 {0x00000098, 0x60010000},
1440 {0x00000099, 0x10010000},
1441 {0x0000009a, 0x00006000},
1442 {0x0000009b, 0x00001000},
1443 {0x0000009f, 0x00b48000}
1447 * cik_srbm_select - select specific register instances
1449 * @rdev: radeon_device pointer
1450 * @me: selected ME (micro engine)
1455 * Switches the currently active registers instances. Some
1456 * registers are instanced per VMID, others are instanced per
1457 * me/pipe/queue combination.
1459 static void cik_srbm_select(struct radeon_device *rdev,
1460 u32 me, u32 pipe, u32 queue, u32 vmid)
1462 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1465 QUEUEID(queue & 0x7));
1466 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1471 * ci_mc_load_microcode - load MC ucode into the hw
1473 * @rdev: radeon_device pointer
1475 * Load the GDDR MC ucode into the hw (CIK).
1476 * Returns 0 on success, error on failure.
1478 static int ci_mc_load_microcode(struct radeon_device *rdev)
1480 const __be32 *fw_data;
1481 u32 running, blackout = 0;
1483 int i, ucode_size, regs_size;
1488 switch (rdev->family) {
1491 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492 ucode_size = CIK_MC_UCODE_SIZE;
1493 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1497 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1501 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1505 /* reset the engine and set to writable */
1506 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1509 /* load mc io regs */
1510 for (i = 0; i < regs_size; i++) {
1511 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1514 /* load the MC ucode */
1515 fw_data = (const __be32 *)rdev->mc_fw->data;
1516 for (i = 0; i < ucode_size; i++)
1517 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1519 /* put the engine back into the active state */
1520 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1524 /* wait for training to complete */
1525 for (i = 0; i < rdev->usec_timeout; i++) {
1526 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1530 for (i = 0; i < rdev->usec_timeout; i++) {
1531 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1544 * cik_init_microcode - load ucode images from disk
1546 * @rdev: radeon_device pointer
1548 * Use the firmware interface to load the ucode images into
1549 * the driver (not loaded into hw).
1550 * Returns 0 on success, error on failure.
1552 static int cik_init_microcode(struct radeon_device *rdev)
1554 const char *chip_name;
1555 size_t pfp_req_size, me_req_size, ce_req_size,
1556 mec_req_size, rlc_req_size, mc_req_size,
1557 sdma_req_size, smc_req_size;
1563 switch (rdev->family) {
1565 chip_name = "BONAIRE";
1566 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567 me_req_size = CIK_ME_UCODE_SIZE * 4;
1568 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1572 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1573 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1576 chip_name = "KAVERI";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1582 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1585 chip_name = "KABINI";
1586 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1591 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1596 DRM_INFO("Loading %s Microcode\n", chip_name);
1598 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1599 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1602 if (rdev->pfp_fw->size != pfp_req_size) {
1604 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605 rdev->pfp_fw->size, fw_name);
1610 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1611 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1614 if (rdev->me_fw->size != me_req_size) {
1616 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617 rdev->me_fw->size, fw_name);
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1622 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1625 if (rdev->ce_fw->size != ce_req_size) {
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->ce_fw->size, fw_name);
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1633 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1636 if (rdev->mec_fw->size != mec_req_size) {
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->mec_fw->size, fw_name);
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1644 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1647 if (rdev->rlc_fw->size != rlc_req_size) {
1649 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->rlc_fw->size, fw_name);
1654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1655 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1658 if (rdev->sdma_fw->size != sdma_req_size) {
1660 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->sdma_fw->size, fw_name);
1665 /* No SMC, MC ucode on APUs */
1666 if (!(rdev->flags & RADEON_IS_IGP)) {
1667 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1668 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1671 if (rdev->mc_fw->size != mc_req_size) {
1673 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674 rdev->mc_fw->size, fw_name);
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1682 "smc: error loading firmware \"%s\"\n",
1684 release_firmware(rdev->smc_fw);
1685 rdev->smc_fw = NULL;
1686 } else if (rdev->smc_fw->size != smc_req_size) {
1688 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689 rdev->smc_fw->size, fw_name);
1698 "cik_cp: Failed to load firmware \"%s\"\n",
1700 release_firmware(rdev->pfp_fw);
1701 rdev->pfp_fw = NULL;
1702 release_firmware(rdev->me_fw);
1704 release_firmware(rdev->ce_fw);
1706 release_firmware(rdev->rlc_fw);
1707 rdev->rlc_fw = NULL;
1708 release_firmware(rdev->mc_fw);
1710 release_firmware(rdev->smc_fw);
1711 rdev->smc_fw = NULL;
1720 * cik_tiling_mode_table_init - init the hw tiling table
1722 * @rdev: radeon_device pointer
1724 * Starting with SI, the tiling setup is done globally in a
1725 * set of 32 tiling modes. Rather than selecting each set of
1726 * parameters per surface as on older asics, we just select
1727 * which index in the tiling table we want to use, and the
1728 * surface uses those parameters (CIK).
1730 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1732 const u32 num_tile_mode_states = 32;
1733 const u32 num_secondary_tile_mode_states = 16;
1734 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735 u32 num_pipe_configs;
1736 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737 rdev->config.cik.max_shader_engines;
1739 switch (rdev->config.cik.mem_row_size_in_kb) {
1741 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1745 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1748 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1752 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753 if (num_pipe_configs > 8)
1754 num_pipe_configs = 8; /* ??? */
1756 if (num_pipe_configs == 8) {
1757 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758 switch (reg_offset) {
1760 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1766 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(split_equal_to_row_size));
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803 TILE_SPLIT(split_equal_to_row_size));
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1810 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1814 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1820 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1836 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1858 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1864 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1879 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1880 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1882 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883 switch (reg_offset) {
1885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888 NUM_BANKS(ADDR_SURF_16_BANK));
1891 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894 NUM_BANKS(ADDR_SURF_16_BANK));
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912 NUM_BANKS(ADDR_SURF_8_BANK));
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918 NUM_BANKS(ADDR_SURF_4_BANK));
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_2_BANK));
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930 NUM_BANKS(ADDR_SURF_16_BANK));
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936 NUM_BANKS(ADDR_SURF_16_BANK));
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954 NUM_BANKS(ADDR_SURF_8_BANK));
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960 NUM_BANKS(ADDR_SURF_4_BANK));
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_2_BANK));
1972 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1974 } else if (num_pipe_configs == 4) {
1976 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977 switch (reg_offset) {
1979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(split_equal_to_row_size));
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2013 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022 TILE_SPLIT(split_equal_to_row_size));
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2029 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2033 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2039 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2055 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2061 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2077 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2098 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2099 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2101 } else if (num_rbs < 4) {
2102 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103 switch (reg_offset) {
2105 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2111 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(split_equal_to_row_size));
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 TILE_SPLIT(split_equal_to_row_size));
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2155 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2159 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2181 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2203 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2225 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2228 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229 switch (reg_offset) {
2231 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234 NUM_BANKS(ADDR_SURF_16_BANK));
2237 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240 NUM_BANKS(ADDR_SURF_16_BANK));
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_8_BANK));
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 NUM_BANKS(ADDR_SURF_4_BANK));
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276 NUM_BANKS(ADDR_SURF_16_BANK));
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 NUM_BANKS(ADDR_SURF_16_BANK));
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306 NUM_BANKS(ADDR_SURF_8_BANK));
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312 NUM_BANKS(ADDR_SURF_4_BANK));
2318 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2320 } else if (num_pipe_configs == 2) {
2321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322 switch (reg_offset) {
2324 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P2) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(split_equal_to_row_size));
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360 PIPE_CONFIG(ADDR_SURF_P2) |
2361 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 PIPE_CONFIG(ADDR_SURF_P2) |
2367 TILE_SPLIT(split_equal_to_row_size));
2370 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2377 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 PIPE_CONFIG(ADDR_SURF_P2) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 PIPE_CONFIG(ADDR_SURF_P2) |
2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2399 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 PIPE_CONFIG(ADDR_SURF_P2) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 PIPE_CONFIG(ADDR_SURF_P2) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2421 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423 PIPE_CONFIG(ADDR_SURF_P2) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 PIPE_CONFIG(ADDR_SURF_P2) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2443 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2445 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446 switch (reg_offset) {
2448 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451 NUM_BANKS(ADDR_SURF_16_BANK));
2454 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457 NUM_BANKS(ADDR_SURF_16_BANK));
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 NUM_BANKS(ADDR_SURF_8_BANK));
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499 NUM_BANKS(ADDR_SURF_16_BANK));
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_8_BANK));
2535 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2538 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2542 * cik_select_se_sh - select which SE, SH to address
2544 * @rdev: radeon_device pointer
2545 * @se_num: shader engine to address
2546 * @sh_num: sh block to address
2548 * Select which SE, SH combinations to address. Certain
2549 * registers are instanced per SE or SH. 0xffffffff means
2550 * broadcast to all SEs or SHs (CIK).
2552 static void cik_select_se_sh(struct radeon_device *rdev,
2553 u32 se_num, u32 sh_num)
2555 u32 data = INSTANCE_BROADCAST_WRITES;
2557 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2558 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2559 else if (se_num == 0xffffffff)
2560 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561 else if (sh_num == 0xffffffff)
2562 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2564 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565 WREG32(GRBM_GFX_INDEX, data);
2569 * cik_create_bitmask - create a bitmask
2571 * @bit_width: length of the mask
2573 * create a variable length bit mask (CIK).
2574 * Returns the bitmask.
2576 static u32 cik_create_bitmask(u32 bit_width)
2580 for (i = 0; i < bit_width; i++) {
2588 * cik_select_se_sh - select which SE, SH to address
2590 * @rdev: radeon_device pointer
2591 * @max_rb_num: max RBs (render backends) for the asic
2592 * @se_num: number of SEs (shader engines) for the asic
2593 * @sh_per_se: number of SH blocks per SE for the asic
2595 * Calculates the bitmask of disabled RBs (CIK).
2596 * Returns the disabled RB bitmask.
2598 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599 u32 max_rb_num, u32 se_num,
2604 data = RREG32(CC_RB_BACKEND_DISABLE);
2606 data &= BACKEND_DISABLE_MASK;
2609 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2611 data >>= BACKEND_DISABLE_SHIFT;
2613 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2619 * cik_setup_rb - setup the RBs on the asic
2621 * @rdev: radeon_device pointer
2622 * @se_num: number of SEs (shader engines) for the asic
2623 * @sh_per_se: number of SH blocks per SE for the asic
2624 * @max_rb_num: max RBs (render backends) for the asic
2626 * Configures per-SE/SH RB registers (CIK).
2628 static void cik_setup_rb(struct radeon_device *rdev,
2629 u32 se_num, u32 sh_per_se,
2634 u32 disabled_rbs = 0;
2635 u32 enabled_rbs = 0;
2637 for (i = 0; i < se_num; i++) {
2638 for (j = 0; j < sh_per_se; j++) {
2639 cik_select_se_sh(rdev, i, j);
2640 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2644 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2647 for (i = 0; i < max_rb_num; i++) {
2648 if (!(disabled_rbs & mask))
2649 enabled_rbs |= mask;
2653 for (i = 0; i < se_num; i++) {
2654 cik_select_se_sh(rdev, i, 0xffffffff);
2656 for (j = 0; j < sh_per_se; j++) {
2657 switch (enabled_rbs & 3) {
2659 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2662 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2666 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2671 WREG32(PA_SC_RASTER_CONFIG, data);
2673 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2677 * cik_gpu_init - setup the 3D engine
2679 * @rdev: radeon_device pointer
2681 * Configures the 3D engine and tiling configuration
2682 * registers so that the 3D engine is usable.
2684 static void cik_gpu_init(struct radeon_device *rdev)
2686 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687 u32 mc_shared_chmap, mc_arb_ramcfg;
2688 u32 hdp_host_path_cntl;
2692 switch (rdev->family) {
2694 rdev->config.cik.max_shader_engines = 2;
2695 rdev->config.cik.max_tile_pipes = 4;
2696 rdev->config.cik.max_cu_per_sh = 7;
2697 rdev->config.cik.max_sh_per_se = 1;
2698 rdev->config.cik.max_backends_per_se = 2;
2699 rdev->config.cik.max_texture_channel_caches = 4;
2700 rdev->config.cik.max_gprs = 256;
2701 rdev->config.cik.max_gs_threads = 32;
2702 rdev->config.cik.max_hw_contexts = 8;
2704 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2711 rdev->config.cik.max_shader_engines = 1;
2712 rdev->config.cik.max_tile_pipes = 4;
2713 if ((rdev->pdev->device == 0x1304) ||
2714 (rdev->pdev->device == 0x1305) ||
2715 (rdev->pdev->device == 0x130C) ||
2716 (rdev->pdev->device == 0x130F) ||
2717 (rdev->pdev->device == 0x1310) ||
2718 (rdev->pdev->device == 0x1311) ||
2719 (rdev->pdev->device == 0x131C)) {
2720 rdev->config.cik.max_cu_per_sh = 8;
2721 rdev->config.cik.max_backends_per_se = 2;
2722 } else if ((rdev->pdev->device == 0x1309) ||
2723 (rdev->pdev->device == 0x130A) ||
2724 (rdev->pdev->device == 0x130D) ||
2725 (rdev->pdev->device == 0x1313)) {
2726 rdev->config.cik.max_cu_per_sh = 6;
2727 rdev->config.cik.max_backends_per_se = 2;
2728 } else if ((rdev->pdev->device == 0x1306) ||
2729 (rdev->pdev->device == 0x1307) ||
2730 (rdev->pdev->device == 0x130B) ||
2731 (rdev->pdev->device == 0x130E) ||
2732 (rdev->pdev->device == 0x1315) ||
2733 (rdev->pdev->device == 0x131B)) {
2734 rdev->config.cik.max_cu_per_sh = 4;
2735 rdev->config.cik.max_backends_per_se = 1;
2737 rdev->config.cik.max_cu_per_sh = 3;
2738 rdev->config.cik.max_backends_per_se = 1;
2740 rdev->config.cik.max_sh_per_se = 1;
2741 rdev->config.cik.max_texture_channel_caches = 4;
2742 rdev->config.cik.max_gprs = 256;
2743 rdev->config.cik.max_gs_threads = 16;
2744 rdev->config.cik.max_hw_contexts = 8;
2746 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2747 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2748 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2749 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2750 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2754 rdev->config.cik.max_shader_engines = 1;
2755 rdev->config.cik.max_tile_pipes = 2;
2756 rdev->config.cik.max_cu_per_sh = 2;
2757 rdev->config.cik.max_sh_per_se = 1;
2758 rdev->config.cik.max_backends_per_se = 1;
2759 rdev->config.cik.max_texture_channel_caches = 2;
2760 rdev->config.cik.max_gprs = 256;
2761 rdev->config.cik.max_gs_threads = 16;
2762 rdev->config.cik.max_hw_contexts = 8;
2764 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2765 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2766 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2767 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2768 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2772 /* Initialize HDP */
2773 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2774 WREG32((0x2c14 + j), 0x00000000);
2775 WREG32((0x2c18 + j), 0x00000000);
2776 WREG32((0x2c1c + j), 0x00000000);
2777 WREG32((0x2c20 + j), 0x00000000);
2778 WREG32((0x2c24 + j), 0x00000000);
2781 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2783 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2785 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2786 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2788 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2789 rdev->config.cik.mem_max_burst_length_bytes = 256;
2790 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2791 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2792 if (rdev->config.cik.mem_row_size_in_kb > 4)
2793 rdev->config.cik.mem_row_size_in_kb = 4;
2794 /* XXX use MC settings? */
2795 rdev->config.cik.shader_engine_tile_size = 32;
2796 rdev->config.cik.num_gpus = 1;
2797 rdev->config.cik.multi_gpu_tile_size = 64;
2799 /* fix up row size */
2800 gb_addr_config &= ~ROW_SIZE_MASK;
2801 switch (rdev->config.cik.mem_row_size_in_kb) {
2804 gb_addr_config |= ROW_SIZE(0);
2807 gb_addr_config |= ROW_SIZE(1);
2810 gb_addr_config |= ROW_SIZE(2);
2814 /* setup tiling info dword. gb_addr_config is not adequate since it does
2815 * not have bank info, so create a custom tiling dword.
2816 * bits 3:0 num_pipes
2817 * bits 7:4 num_banks
2818 * bits 11:8 group_size
2819 * bits 15:12 row_size
2821 rdev->config.cik.tile_config = 0;
2822 switch (rdev->config.cik.num_tile_pipes) {
2824 rdev->config.cik.tile_config |= (0 << 0);
2827 rdev->config.cik.tile_config |= (1 << 0);
2830 rdev->config.cik.tile_config |= (2 << 0);
2834 /* XXX what about 12? */
2835 rdev->config.cik.tile_config |= (3 << 0);
2838 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2839 rdev->config.cik.tile_config |= 1 << 4;
2841 rdev->config.cik.tile_config |= 0 << 4;
2842 rdev->config.cik.tile_config |=
2843 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2844 rdev->config.cik.tile_config |=
2845 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2847 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2848 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2849 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2850 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2851 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2852 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2853 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2854 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2856 cik_tiling_mode_table_init(rdev);
2858 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2859 rdev->config.cik.max_sh_per_se,
2860 rdev->config.cik.max_backends_per_se);
2862 /* set HW defaults for 3D engine */
2863 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2865 WREG32(SX_DEBUG_1, 0x20);
2867 WREG32(TA_CNTL_AUX, 0x00010000);
2869 tmp = RREG32(SPI_CONFIG_CNTL);
2871 WREG32(SPI_CONFIG_CNTL, tmp);
2873 WREG32(SQ_CONFIG, 1);
2875 WREG32(DB_DEBUG, 0);
2877 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2879 WREG32(DB_DEBUG2, tmp);
2881 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2883 WREG32(DB_DEBUG3, tmp);
2885 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2887 WREG32(CB_HW_CONTROL, tmp);
2889 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2891 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2892 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2893 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2894 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2896 WREG32(VGT_NUM_INSTANCES, 1);
2898 WREG32(CP_PERFMON_CNTL, 0);
2900 WREG32(SQ_CONFIG, 0);
2902 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2903 FORCE_EOV_MAX_REZ_CNT(255)));
2905 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2906 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2908 WREG32(VGT_GS_VERTEX_REUSE, 16);
2909 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2911 tmp = RREG32(HDP_MISC_CNTL);
2912 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2913 WREG32(HDP_MISC_CNTL, tmp);
2915 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2916 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2918 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2919 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2925 * GPU scratch registers helpers function.
2928 * cik_scratch_init - setup driver info for CP scratch regs
2930 * @rdev: radeon_device pointer
2932 * Set up the number and offset of the CP scratch registers.
2933 * NOTE: use of CP scratch registers is a legacy inferface and
2934 * is not used by default on newer asics (r6xx+). On newer asics,
2935 * memory buffers are used for fences rather than scratch regs.
2937 static void cik_scratch_init(struct radeon_device *rdev)
2941 rdev->scratch.num_reg = 7;
2942 rdev->scratch.reg_base = SCRATCH_REG0;
2943 for (i = 0; i < rdev->scratch.num_reg; i++) {
2944 rdev->scratch.free[i] = true;
2945 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2950 * cik_ring_test - basic gfx ring test
2952 * @rdev: radeon_device pointer
2953 * @ring: radeon_ring structure holding ring information
2955 * Allocate a scratch register and write to it using the gfx ring (CIK).
2956 * Provides a basic gfx ring test to verify that the ring is working.
2957 * Used by cik_cp_gfx_resume();
2958 * Returns 0 on success, error on failure.
2960 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2967 r = radeon_scratch_get(rdev, &scratch);
2969 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2972 WREG32(scratch, 0xCAFEDEAD);
2973 r = radeon_ring_lock(rdev, ring, 3);
2975 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2976 radeon_scratch_free(rdev, scratch);
2979 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2980 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2981 radeon_ring_write(ring, 0xDEADBEEF);
2982 radeon_ring_unlock_commit(rdev, ring);
2984 for (i = 0; i < rdev->usec_timeout; i++) {
2985 tmp = RREG32(scratch);
2986 if (tmp == 0xDEADBEEF)
2990 if (i < rdev->usec_timeout) {
2991 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2993 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2994 ring->idx, scratch, tmp);
2997 radeon_scratch_free(rdev, scratch);
3002 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3004 * @rdev: radeon_device pointer
3005 * @fence: radeon fence object
3007 * Emits a fence sequnce number on the gfx ring and flushes
3010 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3011 struct radeon_fence *fence)
3013 struct radeon_ring *ring = &rdev->ring[fence->ring];
3014 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3016 /* EVENT_WRITE_EOP - flush caches, send int */
3017 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3018 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3020 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3022 radeon_ring_write(ring, addr & 0xfffffffc);
3023 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3024 radeon_ring_write(ring, fence->seq);
3025 radeon_ring_write(ring, 0);
3027 /* We should be using the new WAIT_REG_MEM special op packet here
3028 * but it causes the CP to hang
3030 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3031 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3032 WRITE_DATA_DST_SEL(0)));
3033 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3034 radeon_ring_write(ring, 0);
3035 radeon_ring_write(ring, 0);
3039 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3041 * @rdev: radeon_device pointer
3042 * @fence: radeon fence object
3044 * Emits a fence sequnce number on the compute ring and flushes
3047 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3048 struct radeon_fence *fence)
3050 struct radeon_ring *ring = &rdev->ring[fence->ring];
3051 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3053 /* RELEASE_MEM - flush caches, send int */
3054 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3055 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3057 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3059 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3060 radeon_ring_write(ring, addr & 0xfffffffc);
3061 radeon_ring_write(ring, upper_32_bits(addr));
3062 radeon_ring_write(ring, fence->seq);
3063 radeon_ring_write(ring, 0);
3065 /* We should be using the new WAIT_REG_MEM special op packet here
3066 * but it causes the CP to hang
3068 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3069 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3070 WRITE_DATA_DST_SEL(0)));
3071 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3072 radeon_ring_write(ring, 0);
3073 radeon_ring_write(ring, 0);
3076 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3077 struct radeon_ring *ring,
3078 struct radeon_semaphore *semaphore,
3081 uint64_t addr = semaphore->gpu_addr;
3082 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3084 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3085 radeon_ring_write(ring, addr & 0xffffffff);
3086 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3093 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3095 * @rdev: radeon_device pointer
3096 * @ib: radeon indirect buffer object
3098 * Emits an DE (drawing engine) or CE (constant engine) IB
3099 * on the gfx ring. IBs are usually generated by userspace
3100 * acceleration drivers and submitted to the kernel for
3101 * sheduling on the ring. This function schedules the IB
3102 * on the gfx ring for execution by the GPU.
3104 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3106 struct radeon_ring *ring = &rdev->ring[ib->ring];
3107 u32 header, control = INDIRECT_BUFFER_VALID;
3109 if (ib->is_const_ib) {
3110 /* set switch buffer packet before const IB */
3111 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3112 radeon_ring_write(ring, 0);
3114 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3117 if (ring->rptr_save_reg) {
3118 next_rptr = ring->wptr + 3 + 4;
3119 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3120 radeon_ring_write(ring, ((ring->rptr_save_reg -
3121 PACKET3_SET_UCONFIG_REG_START) >> 2));
3122 radeon_ring_write(ring, next_rptr);
3123 } else if (rdev->wb.enabled) {
3124 next_rptr = ring->wptr + 5 + 4;
3125 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3126 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3127 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3128 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3129 radeon_ring_write(ring, next_rptr);
3132 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3135 control |= ib->length_dw |
3136 (ib->vm ? (ib->vm->id << 24) : 0);
3138 radeon_ring_write(ring, header);
3139 radeon_ring_write(ring,
3143 (ib->gpu_addr & 0xFFFFFFFC));
3144 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3145 radeon_ring_write(ring, control);
3149 * cik_ib_test - basic gfx ring IB test
3151 * @rdev: radeon_device pointer
3152 * @ring: radeon_ring structure holding ring information
3154 * Allocate an IB and execute it on the gfx ring (CIK).
3155 * Provides a basic gfx ring test to verify that IBs are working.
3156 * Returns 0 on success, error on failure.
3158 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3160 struct radeon_ib ib;
3166 r = radeon_scratch_get(rdev, &scratch);
3168 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3171 WREG32(scratch, 0xCAFEDEAD);
3172 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3174 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3177 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3178 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3179 ib.ptr[2] = 0xDEADBEEF;
3181 r = radeon_ib_schedule(rdev, &ib, NULL);
3183 radeon_scratch_free(rdev, scratch);
3184 radeon_ib_free(rdev, &ib);
3185 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3188 r = radeon_fence_wait(ib.fence, false);
3190 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3193 for (i = 0; i < rdev->usec_timeout; i++) {
3194 tmp = RREG32(scratch);
3195 if (tmp == 0xDEADBEEF)
3199 if (i < rdev->usec_timeout) {
3200 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3202 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3206 radeon_scratch_free(rdev, scratch);
3207 radeon_ib_free(rdev, &ib);
3213 * On CIK, gfx and compute now have independant command processors.
3216 * Gfx consists of a single ring and can process both gfx jobs and
3217 * compute jobs. The gfx CP consists of three microengines (ME):
3218 * PFP - Pre-Fetch Parser
3220 * CE - Constant Engine
3221 * The PFP and ME make up what is considered the Drawing Engine (DE).
3222 * The CE is an asynchronous engine used for updating buffer desciptors
3223 * used by the DE so that they can be loaded into cache in parallel
3224 * while the DE is processing state update packets.
3227 * The compute CP consists of two microengines (ME):
3228 * MEC1 - Compute MicroEngine 1
3229 * MEC2 - Compute MicroEngine 2
3230 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3231 * The queues are exposed to userspace and are programmed directly
3232 * by the compute runtime.
3235 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3237 * @rdev: radeon_device pointer
3238 * @enable: enable or disable the MEs
3240 * Halts or unhalts the gfx MEs.
3242 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3245 WREG32(CP_ME_CNTL, 0);
3247 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3248 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3254 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3256 * @rdev: radeon_device pointer
3258 * Loads the gfx PFP, ME, and CE ucode.
3259 * Returns 0 for success, -EINVAL if the ucode is not available.
3261 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3263 const __be32 *fw_data;
3266 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3269 cik_cp_gfx_enable(rdev, false);
3272 fw_data = (const __be32 *)rdev->pfp_fw->data;
3273 WREG32(CP_PFP_UCODE_ADDR, 0);
3274 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3275 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3276 WREG32(CP_PFP_UCODE_ADDR, 0);
3279 fw_data = (const __be32 *)rdev->ce_fw->data;
3280 WREG32(CP_CE_UCODE_ADDR, 0);
3281 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3282 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3283 WREG32(CP_CE_UCODE_ADDR, 0);
3286 fw_data = (const __be32 *)rdev->me_fw->data;
3287 WREG32(CP_ME_RAM_WADDR, 0);
3288 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3289 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3290 WREG32(CP_ME_RAM_WADDR, 0);
3292 WREG32(CP_PFP_UCODE_ADDR, 0);
3293 WREG32(CP_CE_UCODE_ADDR, 0);
3294 WREG32(CP_ME_RAM_WADDR, 0);
3295 WREG32(CP_ME_RAM_RADDR, 0);
3300 * cik_cp_gfx_start - start the gfx ring
3302 * @rdev: radeon_device pointer
3304 * Enables the ring and loads the clear state context and other
3305 * packets required to init the ring.
3306 * Returns 0 for success, error for failure.
3308 static int cik_cp_gfx_start(struct radeon_device *rdev)
3310 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3314 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3315 WREG32(CP_ENDIAN_SWAP, 0);
3316 WREG32(CP_DEVICE_ID, 1);
3318 cik_cp_gfx_enable(rdev, true);
3320 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3322 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3326 /* init the CE partitions. CE only used for gfx on CIK */
3327 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3328 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3329 radeon_ring_write(ring, 0xc000);
3330 radeon_ring_write(ring, 0xc000);
3332 /* setup clear context state */
3333 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3334 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3336 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3337 radeon_ring_write(ring, 0x80000000);
3338 radeon_ring_write(ring, 0x80000000);
3340 for (i = 0; i < cik_default_size; i++)
3341 radeon_ring_write(ring, cik_default_state[i]);
3343 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3346 /* set clear context state */
3347 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3348 radeon_ring_write(ring, 0);
3350 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3351 radeon_ring_write(ring, 0x00000316);
3352 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3353 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3355 radeon_ring_unlock_commit(rdev, ring);
3361 * cik_cp_gfx_fini - stop the gfx ring
3363 * @rdev: radeon_device pointer
3365 * Stop the gfx ring and tear down the driver ring
3368 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3370 cik_cp_gfx_enable(rdev, false);
3371 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3375 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3377 * @rdev: radeon_device pointer
3379 * Program the location and size of the gfx ring buffer
3380 * and test it to make sure it's working.
3381 * Returns 0 for success, error for failure.
3383 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3385 struct radeon_ring *ring;
3391 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3392 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3394 /* Set the write pointer delay */
3395 WREG32(CP_RB_WPTR_DELAY, 0);
3397 /* set the RB to use vmid 0 */
3398 WREG32(CP_RB_VMID, 0);
3400 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3402 /* ring 0 - compute and gfx */
3403 /* Set ring buffer size */
3404 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3405 rb_bufsz = order_base_2(ring->ring_size / 8);
3406 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3408 tmp |= BUF_SWAP_32BIT;
3410 WREG32(CP_RB0_CNTL, tmp);
3412 /* Initialize the ring buffer's read and write pointers */
3413 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3415 WREG32(CP_RB0_WPTR, ring->wptr);
3417 /* set the wb address wether it's enabled or not */
3418 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3419 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3421 /* scratch register shadowing is no longer supported */
3422 WREG32(SCRATCH_UMSK, 0);
3424 if (!rdev->wb.enabled)
3425 tmp |= RB_NO_UPDATE;
3428 WREG32(CP_RB0_CNTL, tmp);
3430 rb_addr = ring->gpu_addr >> 8;
3431 WREG32(CP_RB0_BASE, rb_addr);
3432 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3434 ring->rptr = RREG32(CP_RB0_RPTR);
3436 /* start the ring */
3437 cik_cp_gfx_start(rdev);
3438 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3439 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3441 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3447 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3448 struct radeon_ring *ring)
3454 if (rdev->wb.enabled) {
3455 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3457 mutex_lock(&rdev->srbm_mutex);
3458 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3459 rptr = RREG32(CP_HQD_PQ_RPTR);
3460 cik_srbm_select(rdev, 0, 0, 0, 0);
3461 mutex_unlock(&rdev->srbm_mutex);
3467 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3468 struct radeon_ring *ring)
3472 if (rdev->wb.enabled) {
3473 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3475 mutex_lock(&rdev->srbm_mutex);
3476 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3477 wptr = RREG32(CP_HQD_PQ_WPTR);
3478 cik_srbm_select(rdev, 0, 0, 0, 0);
3479 mutex_unlock(&rdev->srbm_mutex);
3485 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3486 struct radeon_ring *ring)
3488 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3489 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3493 * cik_cp_compute_enable - enable/disable the compute CP MEs
3495 * @rdev: radeon_device pointer
3496 * @enable: enable or disable the MEs
3498 * Halts or unhalts the compute MEs.
3500 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3503 WREG32(CP_MEC_CNTL, 0);
3505 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3510 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3512 * @rdev: radeon_device pointer
3514 * Loads the compute MEC1&2 ucode.
3515 * Returns 0 for success, -EINVAL if the ucode is not available.
3517 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3519 const __be32 *fw_data;
3525 cik_cp_compute_enable(rdev, false);
3528 fw_data = (const __be32 *)rdev->mec_fw->data;
3529 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3530 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3531 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3532 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3534 if (rdev->family == CHIP_KAVERI) {
3536 fw_data = (const __be32 *)rdev->mec_fw->data;
3537 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3538 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3540 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3547 * cik_cp_compute_start - start the compute queues
3549 * @rdev: radeon_device pointer
3551 * Enable the compute queues.
3552 * Returns 0 for success, error for failure.
3554 static int cik_cp_compute_start(struct radeon_device *rdev)
3556 cik_cp_compute_enable(rdev, true);
3562 * cik_cp_compute_fini - stop the compute queues
3564 * @rdev: radeon_device pointer
3566 * Stop the compute queues and tear down the driver queue
3569 static void cik_cp_compute_fini(struct radeon_device *rdev)
3573 cik_cp_compute_enable(rdev, false);
3575 for (i = 0; i < 2; i++) {
3577 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3579 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3581 if (rdev->ring[idx].mqd_obj) {
3582 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3583 if (unlikely(r != 0))
3584 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3586 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3587 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3589 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3590 rdev->ring[idx].mqd_obj = NULL;
3595 static void cik_mec_fini(struct radeon_device *rdev)
3599 if (rdev->mec.hpd_eop_obj) {
3600 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3601 if (unlikely(r != 0))
3602 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3603 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3604 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3606 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3607 rdev->mec.hpd_eop_obj = NULL;
3611 #define MEC_HPD_SIZE 2048
3613 static int cik_mec_init(struct radeon_device *rdev)
3619 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3620 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3622 if (rdev->family == CHIP_KAVERI)
3623 rdev->mec.num_mec = 2;
3625 rdev->mec.num_mec = 1;
3626 rdev->mec.num_pipe = 4;
3627 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3629 if (rdev->mec.hpd_eop_obj == NULL) {
3630 r = radeon_bo_create(rdev,
3631 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3633 RADEON_GEM_DOMAIN_GTT, NULL,
3634 &rdev->mec.hpd_eop_obj);
3636 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3641 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3642 if (unlikely(r != 0)) {
3646 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3647 &rdev->mec.hpd_eop_gpu_addr);
3649 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3653 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3655 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3660 /* clear memory. Not sure if this is required or not */
3661 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3663 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3664 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3669 struct hqd_registers
3671 u32 cp_mqd_base_addr;
3672 u32 cp_mqd_base_addr_hi;
3675 u32 cp_hqd_persistent_state;
3676 u32 cp_hqd_pipe_priority;
3677 u32 cp_hqd_queue_priority;
3680 u32 cp_hqd_pq_base_hi;
3682 u32 cp_hqd_pq_rptr_report_addr;
3683 u32 cp_hqd_pq_rptr_report_addr_hi;
3684 u32 cp_hqd_pq_wptr_poll_addr;
3685 u32 cp_hqd_pq_wptr_poll_addr_hi;
3686 u32 cp_hqd_pq_doorbell_control;
3688 u32 cp_hqd_pq_control;
3689 u32 cp_hqd_ib_base_addr;
3690 u32 cp_hqd_ib_base_addr_hi;
3692 u32 cp_hqd_ib_control;
3693 u32 cp_hqd_iq_timer;
3695 u32 cp_hqd_dequeue_request;
3696 u32 cp_hqd_dma_offload;
3697 u32 cp_hqd_sema_cmd;
3698 u32 cp_hqd_msg_type;
3699 u32 cp_hqd_atomic0_preop_lo;
3700 u32 cp_hqd_atomic0_preop_hi;
3701 u32 cp_hqd_atomic1_preop_lo;
3702 u32 cp_hqd_atomic1_preop_hi;
3703 u32 cp_hqd_hq_scheduler0;
3704 u32 cp_hqd_hq_scheduler1;
3711 u32 dispatch_initiator;
3715 u32 pipeline_stat_enable;
3716 u32 perf_counter_enable;
3722 u32 resource_limits;
3723 u32 static_thread_mgmt01[2];
3725 u32 static_thread_mgmt23[2];
3727 u32 thread_trace_enable;
3730 u32 vgtcs_invoke_count[2];
3731 struct hqd_registers queue_state;
3733 u32 interrupt_queue[64];
3737 * cik_cp_compute_resume - setup the compute queue registers
3739 * @rdev: radeon_device pointer
3741 * Program the compute queues and test them to make sure they
3743 * Returns 0 for success, error for failure.
3745 static int cik_cp_compute_resume(struct radeon_device *rdev)
3749 bool use_doorbell = true;
3755 struct bonaire_mqd *mqd;
3757 r = cik_cp_compute_start(rdev);
3761 /* fix up chicken bits */
3762 tmp = RREG32(CP_CPF_DEBUG);
3764 WREG32(CP_CPF_DEBUG, tmp);
3766 /* init the pipes */
3767 mutex_lock(&rdev->srbm_mutex);
3768 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3769 int me = (i < 4) ? 1 : 2;
3770 int pipe = (i < 4) ? i : (i - 4);
3772 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3774 cik_srbm_select(rdev, me, pipe, 0, 0);
3776 /* write the EOP addr */
3777 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3778 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3780 /* set the VMID assigned */
3781 WREG32(CP_HPD_EOP_VMID, 0);
3783 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3784 tmp = RREG32(CP_HPD_EOP_CONTROL);
3785 tmp &= ~EOP_SIZE_MASK;
3786 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3787 WREG32(CP_HPD_EOP_CONTROL, tmp);
3789 cik_srbm_select(rdev, 0, 0, 0, 0);
3790 mutex_unlock(&rdev->srbm_mutex);
3792 /* init the queues. Just two for now. */
3793 for (i = 0; i < 2; i++) {
3795 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3797 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3799 if (rdev->ring[idx].mqd_obj == NULL) {
3800 r = radeon_bo_create(rdev,
3801 sizeof(struct bonaire_mqd),
3803 RADEON_GEM_DOMAIN_GTT, NULL,
3804 &rdev->ring[idx].mqd_obj);
3806 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3811 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3812 if (unlikely(r != 0)) {
3813 cik_cp_compute_fini(rdev);
3816 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3819 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3820 cik_cp_compute_fini(rdev);
3823 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3825 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3826 cik_cp_compute_fini(rdev);
3830 /* doorbell offset */
3831 rdev->ring[idx].doorbell_offset =
3832 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3834 /* init the mqd struct */
3835 memset(buf, 0, sizeof(struct bonaire_mqd));
3837 mqd = (struct bonaire_mqd *)buf;
3838 mqd->header = 0xC0310800;
3839 mqd->static_thread_mgmt01[0] = 0xffffffff;
3840 mqd->static_thread_mgmt01[1] = 0xffffffff;
3841 mqd->static_thread_mgmt23[0] = 0xffffffff;
3842 mqd->static_thread_mgmt23[1] = 0xffffffff;
3844 mutex_lock(&rdev->srbm_mutex);
3845 cik_srbm_select(rdev, rdev->ring[idx].me,
3846 rdev->ring[idx].pipe,
3847 rdev->ring[idx].queue, 0);
3849 /* disable wptr polling */
3850 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3851 tmp &= ~WPTR_POLL_EN;
3852 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3854 /* enable doorbell? */
3855 mqd->queue_state.cp_hqd_pq_doorbell_control =
3856 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3858 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3860 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3861 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3862 mqd->queue_state.cp_hqd_pq_doorbell_control);
3864 /* disable the queue if it's active */
3865 mqd->queue_state.cp_hqd_dequeue_request = 0;
3866 mqd->queue_state.cp_hqd_pq_rptr = 0;
3867 mqd->queue_state.cp_hqd_pq_wptr= 0;
3868 if (RREG32(CP_HQD_ACTIVE) & 1) {
3869 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3870 for (i = 0; i < rdev->usec_timeout; i++) {
3871 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3875 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3876 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3877 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3880 /* set the pointer to the MQD */
3881 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3882 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3883 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3884 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3885 /* set MQD vmid to 0 */
3886 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3887 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3888 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3890 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3891 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3892 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3893 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3894 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3895 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3897 /* set up the HQD, this is similar to CP_RB0_CNTL */
3898 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3899 mqd->queue_state.cp_hqd_pq_control &=
3900 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3902 mqd->queue_state.cp_hqd_pq_control |=
3903 order_base_2(rdev->ring[idx].ring_size / 8);
3904 mqd->queue_state.cp_hqd_pq_control |=
3905 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3907 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3909 mqd->queue_state.cp_hqd_pq_control &=
3910 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3911 mqd->queue_state.cp_hqd_pq_control |=
3912 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3913 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3915 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3917 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3919 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3920 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3921 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3922 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3923 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3924 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3926 /* set the wb address wether it's enabled or not */
3928 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3930 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3931 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3932 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3933 upper_32_bits(wb_gpu_addr) & 0xffff;
3934 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3935 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3936 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3937 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3939 /* enable the doorbell if requested */
3941 mqd->queue_state.cp_hqd_pq_doorbell_control =
3942 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3943 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3944 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3945 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3946 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3947 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3948 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3951 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3953 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3954 mqd->queue_state.cp_hqd_pq_doorbell_control);
3956 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3957 rdev->ring[idx].wptr = 0;
3958 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3959 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3960 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3961 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3963 /* set the vmid for the queue */
3964 mqd->queue_state.cp_hqd_vmid = 0;
3965 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3967 /* activate the queue */
3968 mqd->queue_state.cp_hqd_active = 1;
3969 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3971 cik_srbm_select(rdev, 0, 0, 0, 0);
3972 mutex_unlock(&rdev->srbm_mutex);
3974 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3975 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3977 rdev->ring[idx].ready = true;
3978 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3980 rdev->ring[idx].ready = false;
3986 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3988 cik_cp_gfx_enable(rdev, enable);
3989 cik_cp_compute_enable(rdev, enable);
3992 static int cik_cp_load_microcode(struct radeon_device *rdev)
3996 r = cik_cp_gfx_load_microcode(rdev);
3999 r = cik_cp_compute_load_microcode(rdev);
4006 static void cik_cp_fini(struct radeon_device *rdev)
4008 cik_cp_gfx_fini(rdev);
4009 cik_cp_compute_fini(rdev);
4012 static int cik_cp_resume(struct radeon_device *rdev)
4016 r = cik_cp_load_microcode(rdev);
4020 r = cik_cp_gfx_resume(rdev);
4023 r = cik_cp_compute_resume(rdev);
4030 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4032 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4033 RREG32(GRBM_STATUS));
4034 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4035 RREG32(GRBM_STATUS2));
4036 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4037 RREG32(GRBM_STATUS_SE0));
4038 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4039 RREG32(GRBM_STATUS_SE1));
4040 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4041 RREG32(GRBM_STATUS_SE2));
4042 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4043 RREG32(GRBM_STATUS_SE3));
4044 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4045 RREG32(SRBM_STATUS));
4046 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4047 RREG32(SRBM_STATUS2));
4048 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4049 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4050 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4051 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4052 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4053 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4054 RREG32(CP_STALLED_STAT1));
4055 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4056 RREG32(CP_STALLED_STAT2));
4057 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4058 RREG32(CP_STALLED_STAT3));
4059 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4060 RREG32(CP_CPF_BUSY_STAT));
4061 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4062 RREG32(CP_CPF_STALLED_STAT1));
4063 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4064 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4065 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4066 RREG32(CP_CPC_STALLED_STAT1));
4067 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4071 * cik_gpu_check_soft_reset - check which blocks are busy
4073 * @rdev: radeon_device pointer
4075 * Check which blocks are busy and return the relevant reset
4076 * mask to be used by cik_gpu_soft_reset().
4077 * Returns a mask of the blocks to be reset.
4079 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4085 tmp = RREG32(GRBM_STATUS);
4086 if (tmp & (PA_BUSY | SC_BUSY |
4087 BCI_BUSY | SX_BUSY |
4088 TA_BUSY | VGT_BUSY |
4090 GDS_BUSY | SPI_BUSY |
4091 IA_BUSY | IA_BUSY_NO_DMA))
4092 reset_mask |= RADEON_RESET_GFX;
4094 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4095 reset_mask |= RADEON_RESET_CP;
4098 tmp = RREG32(GRBM_STATUS2);
4100 reset_mask |= RADEON_RESET_RLC;
4102 /* SDMA0_STATUS_REG */
4103 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4104 if (!(tmp & SDMA_IDLE))
4105 reset_mask |= RADEON_RESET_DMA;
4107 /* SDMA1_STATUS_REG */
4108 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4109 if (!(tmp & SDMA_IDLE))
4110 reset_mask |= RADEON_RESET_DMA1;
4113 tmp = RREG32(SRBM_STATUS2);
4114 if (tmp & SDMA_BUSY)
4115 reset_mask |= RADEON_RESET_DMA;
4117 if (tmp & SDMA1_BUSY)
4118 reset_mask |= RADEON_RESET_DMA1;
4121 tmp = RREG32(SRBM_STATUS);
4124 reset_mask |= RADEON_RESET_IH;
4127 reset_mask |= RADEON_RESET_SEM;
4129 if (tmp & GRBM_RQ_PENDING)
4130 reset_mask |= RADEON_RESET_GRBM;
4133 reset_mask |= RADEON_RESET_VMC;
4135 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4136 MCC_BUSY | MCD_BUSY))
4137 reset_mask |= RADEON_RESET_MC;
4139 if (evergreen_is_display_hung(rdev))
4140 reset_mask |= RADEON_RESET_DISPLAY;
4142 /* Skip MC reset as it's mostly likely not hung, just busy */
4143 if (reset_mask & RADEON_RESET_MC) {
4144 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4145 reset_mask &= ~RADEON_RESET_MC;
4152 * cik_gpu_soft_reset - soft reset GPU
4154 * @rdev: radeon_device pointer
4155 * @reset_mask: mask of which blocks to reset
4157 * Soft reset the blocks specified in @reset_mask.
4159 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4161 struct evergreen_mc_save save;
4162 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4165 if (reset_mask == 0)
4168 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4170 cik_print_gpu_status_regs(rdev);
4171 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4172 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4173 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4174 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4179 /* Disable GFX parsing/prefetching */
4180 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4182 /* Disable MEC parsing/prefetching */
4183 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4185 if (reset_mask & RADEON_RESET_DMA) {
4187 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4189 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4191 if (reset_mask & RADEON_RESET_DMA1) {
4193 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4195 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4198 evergreen_mc_stop(rdev, &save);
4199 if (evergreen_mc_wait_for_idle(rdev)) {
4200 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4203 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4204 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4206 if (reset_mask & RADEON_RESET_CP) {
4207 grbm_soft_reset |= SOFT_RESET_CP;
4209 srbm_soft_reset |= SOFT_RESET_GRBM;
4212 if (reset_mask & RADEON_RESET_DMA)
4213 srbm_soft_reset |= SOFT_RESET_SDMA;
4215 if (reset_mask & RADEON_RESET_DMA1)
4216 srbm_soft_reset |= SOFT_RESET_SDMA1;
4218 if (reset_mask & RADEON_RESET_DISPLAY)
4219 srbm_soft_reset |= SOFT_RESET_DC;
4221 if (reset_mask & RADEON_RESET_RLC)
4222 grbm_soft_reset |= SOFT_RESET_RLC;
4224 if (reset_mask & RADEON_RESET_SEM)
4225 srbm_soft_reset |= SOFT_RESET_SEM;
4227 if (reset_mask & RADEON_RESET_IH)
4228 srbm_soft_reset |= SOFT_RESET_IH;
4230 if (reset_mask & RADEON_RESET_GRBM)
4231 srbm_soft_reset |= SOFT_RESET_GRBM;
4233 if (reset_mask & RADEON_RESET_VMC)
4234 srbm_soft_reset |= SOFT_RESET_VMC;
4236 if (!(rdev->flags & RADEON_IS_IGP)) {
4237 if (reset_mask & RADEON_RESET_MC)
4238 srbm_soft_reset |= SOFT_RESET_MC;
4241 if (grbm_soft_reset) {
4242 tmp = RREG32(GRBM_SOFT_RESET);
4243 tmp |= grbm_soft_reset;
4244 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4245 WREG32(GRBM_SOFT_RESET, tmp);
4246 tmp = RREG32(GRBM_SOFT_RESET);
4250 tmp &= ~grbm_soft_reset;
4251 WREG32(GRBM_SOFT_RESET, tmp);
4252 tmp = RREG32(GRBM_SOFT_RESET);
4255 if (srbm_soft_reset) {
4256 tmp = RREG32(SRBM_SOFT_RESET);
4257 tmp |= srbm_soft_reset;
4258 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4259 WREG32(SRBM_SOFT_RESET, tmp);
4260 tmp = RREG32(SRBM_SOFT_RESET);
4264 tmp &= ~srbm_soft_reset;
4265 WREG32(SRBM_SOFT_RESET, tmp);
4266 tmp = RREG32(SRBM_SOFT_RESET);
4269 /* Wait a little for things to settle down */
4272 evergreen_mc_resume(rdev, &save);
4275 cik_print_gpu_status_regs(rdev);
4279 * cik_asic_reset - soft reset GPU
4281 * @rdev: radeon_device pointer
4283 * Look up which blocks are hung and attempt
4285 * Returns 0 for success.
4287 int cik_asic_reset(struct radeon_device *rdev)
4291 reset_mask = cik_gpu_check_soft_reset(rdev);
4294 r600_set_bios_scratch_engine_hung(rdev, true);
4296 cik_gpu_soft_reset(rdev, reset_mask);
4298 reset_mask = cik_gpu_check_soft_reset(rdev);
4301 r600_set_bios_scratch_engine_hung(rdev, false);
4307 * cik_gfx_is_lockup - check if the 3D engine is locked up
4309 * @rdev: radeon_device pointer
4310 * @ring: radeon_ring structure holding ring information
4312 * Check if the 3D engine is locked up (CIK).
4313 * Returns true if the engine is locked, false if not.
4315 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4317 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4319 if (!(reset_mask & (RADEON_RESET_GFX |
4320 RADEON_RESET_COMPUTE |
4321 RADEON_RESET_CP))) {
4322 radeon_ring_lockup_update(ring);
4325 /* force CP activities */
4326 radeon_ring_force_activity(rdev, ring);
4327 return radeon_ring_test_lockup(rdev, ring);
4332 * cik_mc_program - program the GPU memory controller
4334 * @rdev: radeon_device pointer
4336 * Set the location of vram, gart, and AGP in the GPU's
4337 * physical address space (CIK).
4339 static void cik_mc_program(struct radeon_device *rdev)
4341 struct evergreen_mc_save save;
4345 /* Initialize HDP */
4346 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4347 WREG32((0x2c14 + j), 0x00000000);
4348 WREG32((0x2c18 + j), 0x00000000);
4349 WREG32((0x2c1c + j), 0x00000000);
4350 WREG32((0x2c20 + j), 0x00000000);
4351 WREG32((0x2c24 + j), 0x00000000);
4353 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4355 evergreen_mc_stop(rdev, &save);
4356 if (radeon_mc_wait_for_idle(rdev)) {
4357 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4359 /* Lockout access through VGA aperture*/
4360 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4361 /* Update configuration */
4362 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4363 rdev->mc.vram_start >> 12);
4364 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4365 rdev->mc.vram_end >> 12);
4366 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4367 rdev->vram_scratch.gpu_addr >> 12);
4368 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4369 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4370 WREG32(MC_VM_FB_LOCATION, tmp);
4371 /* XXX double check these! */
4372 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4373 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4374 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4375 WREG32(MC_VM_AGP_BASE, 0);
4376 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4377 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4378 if (radeon_mc_wait_for_idle(rdev)) {
4379 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4381 evergreen_mc_resume(rdev, &save);
4382 /* we need to own VRAM, so turn off the VGA renderer here
4383 * to stop it overwriting our objects */
4384 rv515_vga_render_disable(rdev);
4388 * cik_mc_init - initialize the memory controller driver params
4390 * @rdev: radeon_device pointer
4392 * Look up the amount of vram, vram width, and decide how to place
4393 * vram and gart within the GPU's physical address space (CIK).
4394 * Returns 0 for success.
4396 static int cik_mc_init(struct radeon_device *rdev)
4399 int chansize, numchan;
4401 /* Get VRAM informations */
4402 rdev->mc.vram_is_ddr = true;
4403 tmp = RREG32(MC_ARB_RAMCFG);
4404 if (tmp & CHANSIZE_MASK) {
4409 tmp = RREG32(MC_SHARED_CHMAP);
4410 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4440 rdev->mc.vram_width = numchan * chansize;
4441 /* Could aper size report 0 ? */
4442 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4443 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4444 /* size in MB on si */
4445 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4446 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4447 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4448 si_vram_gtt_location(rdev, &rdev->mc);
4449 radeon_update_bandwidth_info(rdev);
4456 * VMID 0 is the physical GPU addresses as used by the kernel.
4457 * VMIDs 1-15 are used for userspace clients and are handled
4458 * by the radeon vm/hsa code.
4461 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4463 * @rdev: radeon_device pointer
4465 * Flush the TLB for the VMID 0 page table (CIK).
4467 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4469 /* flush hdp cache */
4470 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4472 /* bits 0-15 are the VM contexts0-15 */
4473 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4477 * cik_pcie_gart_enable - gart enable
4479 * @rdev: radeon_device pointer
4481 * This sets up the TLBs, programs the page tables for VMID0,
4482 * sets up the hw for VMIDs 1-15 which are allocated on
4483 * demand, and sets up the global locations for the LDS, GDS,
4484 * and GPUVM for FSA64 clients (CIK).
4485 * Returns 0 for success, errors for failure.
4487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4491 if (rdev->gart.robj == NULL) {
4492 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4495 r = radeon_gart_table_vram_pin(rdev);
4498 radeon_gart_restore(rdev);
4499 /* Setup TLB control */
4500 WREG32(MC_VM_MX_L1_TLB_CNTL,
4503 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4504 ENABLE_ADVANCED_DRIVER_MODEL |
4505 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4506 /* Setup L2 cache */
4507 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4508 ENABLE_L2_FRAGMENT_PROCESSING |
4509 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4510 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4511 EFFECTIVE_L2_QUEUE_SIZE(7) |
4512 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4513 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4514 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4515 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4516 /* setup context0 */
4517 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4518 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4519 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4520 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4521 (u32)(rdev->dummy_page.addr >> 12));
4522 WREG32(VM_CONTEXT0_CNTL2, 0);
4523 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4524 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4530 /* empty context1-15 */
4531 /* FIXME start with 4G, once using 2 level pt switch to full
4534 /* set vm size, must be a multiple of 4 */
4535 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4536 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4537 for (i = 1; i < 16; i++) {
4539 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4540 rdev->gart.table_addr >> 12);
4542 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4543 rdev->gart.table_addr >> 12);
4546 /* enable context1-15 */
4547 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4548 (u32)(rdev->dummy_page.addr >> 12));
4549 WREG32(VM_CONTEXT1_CNTL2, 4);
4550 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4551 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4552 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4553 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4554 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4555 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4556 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4557 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4558 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4559 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4560 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4561 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4562 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4564 /* TC cache setup ??? */
4565 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4566 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4567 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4569 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4570 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4571 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4572 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4573 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4575 WREG32(TC_CFG_L1_VOLATILE, 0);
4576 WREG32(TC_CFG_L2_VOLATILE, 0);
4578 if (rdev->family == CHIP_KAVERI) {
4579 u32 tmp = RREG32(CHUB_CONTROL);
4581 WREG32(CHUB_CONTROL, tmp);
4584 /* XXX SH_MEM regs */
4585 /* where to put LDS, scratch, GPUVM in FSA64 space */
4586 mutex_lock(&rdev->srbm_mutex);
4587 for (i = 0; i < 16; i++) {
4588 cik_srbm_select(rdev, 0, 0, 0, i);
4589 /* CP and shaders */
4590 WREG32(SH_MEM_CONFIG, 0);
4591 WREG32(SH_MEM_APE1_BASE, 1);
4592 WREG32(SH_MEM_APE1_LIMIT, 0);
4593 WREG32(SH_MEM_BASES, 0);
4595 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4596 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4597 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4598 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4599 /* XXX SDMA RLC - todo */
4601 cik_srbm_select(rdev, 0, 0, 0, 0);
4602 mutex_unlock(&rdev->srbm_mutex);
4604 cik_pcie_gart_tlb_flush(rdev);
4605 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4606 (unsigned)(rdev->mc.gtt_size >> 20),
4607 (unsigned long long)rdev->gart.table_addr);
4608 rdev->gart.ready = true;
4613 * cik_pcie_gart_disable - gart disable
4615 * @rdev: radeon_device pointer
4617 * This disables all VM page table (CIK).
4619 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4621 /* Disable all tables */
4622 WREG32(VM_CONTEXT0_CNTL, 0);
4623 WREG32(VM_CONTEXT1_CNTL, 0);
4624 /* Setup TLB control */
4625 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4626 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4627 /* Setup L2 cache */
4629 ENABLE_L2_FRAGMENT_PROCESSING |
4630 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4631 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4632 EFFECTIVE_L2_QUEUE_SIZE(7) |
4633 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4634 WREG32(VM_L2_CNTL2, 0);
4635 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4636 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4637 radeon_gart_table_vram_unpin(rdev);
4641 * cik_pcie_gart_fini - vm fini callback
4643 * @rdev: radeon_device pointer
4645 * Tears down the driver GART/VM setup (CIK).
4647 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4649 cik_pcie_gart_disable(rdev);
4650 radeon_gart_table_vram_free(rdev);
4651 radeon_gart_fini(rdev);
4656 * cik_ib_parse - vm ib_parse callback
4658 * @rdev: radeon_device pointer
4659 * @ib: indirect buffer pointer
4661 * CIK uses hw IB checking so this is a nop (CIK).
4663 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4670 * VMID 0 is the physical GPU addresses as used by the kernel.
4671 * VMIDs 1-15 are used for userspace clients and are handled
4672 * by the radeon vm/hsa code.
4675 * cik_vm_init - cik vm init callback
4677 * @rdev: radeon_device pointer
4679 * Inits cik specific vm parameters (number of VMs, base of vram for
4680 * VMIDs 1-15) (CIK).
4681 * Returns 0 for success.
4683 int cik_vm_init(struct radeon_device *rdev)
4686 rdev->vm_manager.nvm = 16;
4687 /* base offset of vram pages */
4688 if (rdev->flags & RADEON_IS_IGP) {
4689 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4691 rdev->vm_manager.vram_base_offset = tmp;
4693 rdev->vm_manager.vram_base_offset = 0;
4699 * cik_vm_fini - cik vm fini callback
4701 * @rdev: radeon_device pointer
4703 * Tear down any asic specific VM setup (CIK).
4705 void cik_vm_fini(struct radeon_device *rdev)
4710 * cik_vm_decode_fault - print human readable fault info
4712 * @rdev: radeon_device pointer
4713 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4714 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4716 * Print human readable fault information (CIK).
4718 static void cik_vm_decode_fault(struct radeon_device *rdev,
4719 u32 status, u32 addr, u32 mc_client)
4721 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4722 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4723 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4724 char *block = (char *)&mc_client;
4726 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4727 protections, vmid, addr,
4728 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4733 * cik_vm_flush - cik vm flush using the CP
4735 * @rdev: radeon_device pointer
4737 * Update the page table base and flush the VM TLB
4738 * using the CP (CIK).
4740 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4742 struct radeon_ring *ring = &rdev->ring[ridx];
4747 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749 WRITE_DATA_DST_SEL(0)));
4751 radeon_ring_write(ring,
4752 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4754 radeon_ring_write(ring,
4755 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4757 radeon_ring_write(ring, 0);
4758 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4760 /* update SH_MEM_* regs */
4761 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763 WRITE_DATA_DST_SEL(0)));
4764 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4765 radeon_ring_write(ring, 0);
4766 radeon_ring_write(ring, VMID(vm->id));
4768 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4769 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4770 WRITE_DATA_DST_SEL(0)));
4771 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4772 radeon_ring_write(ring, 0);
4774 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4775 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4776 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4777 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4779 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4780 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4781 WRITE_DATA_DST_SEL(0)));
4782 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4783 radeon_ring_write(ring, 0);
4784 radeon_ring_write(ring, VMID(0));
4787 /* We should be using the WAIT_REG_MEM packet here like in
4788 * cik_fence_ring_emit(), but it causes the CP to hang in this
4791 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4792 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793 WRITE_DATA_DST_SEL(0)));
4794 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4795 radeon_ring_write(ring, 0);
4796 radeon_ring_write(ring, 0);
4798 /* bits 0-15 are the VM contexts0-15 */
4799 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4800 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4801 WRITE_DATA_DST_SEL(0)));
4802 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4803 radeon_ring_write(ring, 0);
4804 radeon_ring_write(ring, 1 << vm->id);
4806 /* compute doesn't have PFP */
4807 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4808 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4809 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4810 radeon_ring_write(ring, 0x0);
4815 * cik_vm_set_page - update the page tables using sDMA
4817 * @rdev: radeon_device pointer
4818 * @ib: indirect buffer to fill with commands
4819 * @pe: addr of the page entry
4820 * @addr: dst addr to write into pe
4821 * @count: number of page entries to update
4822 * @incr: increase next addr by incr bytes
4823 * @flags: access flags
4825 * Update the page tables using CP or sDMA (CIK).
4827 void cik_vm_set_page(struct radeon_device *rdev,
4828 struct radeon_ib *ib,
4830 uint64_t addr, unsigned count,
4831 uint32_t incr, uint32_t flags)
4833 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4837 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4840 ndw = 2 + count * 2;
4844 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4845 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4846 WRITE_DATA_DST_SEL(1));
4847 ib->ptr[ib->length_dw++] = pe;
4848 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4849 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4850 if (flags & RADEON_VM_PAGE_SYSTEM) {
4851 value = radeon_vm_map_gart(rdev, addr);
4852 value &= 0xFFFFFFFFFFFFF000ULL;
4853 } else if (flags & RADEON_VM_PAGE_VALID) {
4859 value |= r600_flags;
4860 ib->ptr[ib->length_dw++] = value;
4861 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4866 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4872 * The RLC is a multi-purpose microengine that handles a
4873 * variety of functions, the most important of which is
4874 * the interrupt controller.
4876 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4879 u32 tmp = RREG32(CP_INT_CNTL_RING0);
4882 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4884 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4885 WREG32(CP_INT_CNTL_RING0, tmp);
4888 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4892 tmp = RREG32(RLC_LB_CNTL);
4894 tmp |= LOAD_BALANCE_ENABLE;
4896 tmp &= ~LOAD_BALANCE_ENABLE;
4897 WREG32(RLC_LB_CNTL, tmp);
4900 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4905 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4906 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4907 cik_select_se_sh(rdev, i, j);
4908 for (k = 0; k < rdev->usec_timeout; k++) {
4909 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4915 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4917 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4918 for (k = 0; k < rdev->usec_timeout; k++) {
4919 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4925 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4929 tmp = RREG32(RLC_CNTL);
4931 WREG32(RLC_CNTL, rlc);
4934 static u32 cik_halt_rlc(struct radeon_device *rdev)
4938 orig = data = RREG32(RLC_CNTL);
4940 if (data & RLC_ENABLE) {
4943 data &= ~RLC_ENABLE;
4944 WREG32(RLC_CNTL, data);
4946 for (i = 0; i < rdev->usec_timeout; i++) {
4947 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4952 cik_wait_for_rlc_serdes(rdev);
4958 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4962 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4963 WREG32(RLC_GPR_REG2, tmp);
4965 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4966 for (i = 0; i < rdev->usec_timeout; i++) {
4967 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4972 for (i = 0; i < rdev->usec_timeout; i++) {
4973 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4979 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4983 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4984 WREG32(RLC_GPR_REG2, tmp);
4988 * cik_rlc_stop - stop the RLC ME
4990 * @rdev: radeon_device pointer
4992 * Halt the RLC ME (MicroEngine) (CIK).
4994 static void cik_rlc_stop(struct radeon_device *rdev)
4996 WREG32(RLC_CNTL, 0);
4998 cik_enable_gui_idle_interrupt(rdev, false);
5000 cik_wait_for_rlc_serdes(rdev);
5004 * cik_rlc_start - start the RLC ME
5006 * @rdev: radeon_device pointer
5008 * Unhalt the RLC ME (MicroEngine) (CIK).
5010 static void cik_rlc_start(struct radeon_device *rdev)
5012 WREG32(RLC_CNTL, RLC_ENABLE);
5014 cik_enable_gui_idle_interrupt(rdev, true);
5020 * cik_rlc_resume - setup the RLC hw
5022 * @rdev: radeon_device pointer
5024 * Initialize the RLC registers, load the ucode,
5025 * and start the RLC (CIK).
5026 * Returns 0 for success, -EINVAL if the ucode is not available.
5028 static int cik_rlc_resume(struct radeon_device *rdev)
5031 const __be32 *fw_data;
5036 switch (rdev->family) {
5039 size = BONAIRE_RLC_UCODE_SIZE;
5042 size = KV_RLC_UCODE_SIZE;
5045 size = KB_RLC_UCODE_SIZE;
5052 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5053 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5061 WREG32(RLC_LB_CNTR_INIT, 0);
5062 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5064 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5065 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5066 WREG32(RLC_LB_PARAMS, 0x00600408);
5067 WREG32(RLC_LB_CNTL, 0x80000004);
5069 WREG32(RLC_MC_CNTL, 0);
5070 WREG32(RLC_UCODE_CNTL, 0);
5072 fw_data = (const __be32 *)rdev->rlc_fw->data;
5073 WREG32(RLC_GPM_UCODE_ADDR, 0);
5074 for (i = 0; i < size; i++)
5075 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5076 WREG32(RLC_GPM_UCODE_ADDR, 0);
5078 /* XXX - find out what chips support lbpw */
5079 cik_enable_lbpw(rdev, false);
5081 if (rdev->family == CHIP_BONAIRE)
5082 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5084 cik_rlc_start(rdev);
5089 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5091 u32 data, orig, tmp, tmp2;
5093 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5095 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5096 cik_enable_gui_idle_interrupt(rdev, true);
5098 tmp = cik_halt_rlc(rdev);
5100 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5101 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5102 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5103 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5104 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5106 cik_update_rlc(rdev, tmp);
5108 data |= CGCG_EN | CGLS_EN;
5110 cik_enable_gui_idle_interrupt(rdev, false);
5112 RREG32(CB_CGTT_SCLK_CTRL);
5113 RREG32(CB_CGTT_SCLK_CTRL);
5114 RREG32(CB_CGTT_SCLK_CTRL);
5115 RREG32(CB_CGTT_SCLK_CTRL);
5117 data &= ~(CGCG_EN | CGLS_EN);
5121 WREG32(RLC_CGCG_CGLS_CTRL, data);
5125 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5127 u32 data, orig, tmp = 0;
5129 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5130 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5131 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5132 orig = data = RREG32(CP_MEM_SLP_CNTL);
5133 data |= CP_MEM_LS_EN;
5135 WREG32(CP_MEM_SLP_CNTL, data);
5139 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5142 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5144 tmp = cik_halt_rlc(rdev);
5146 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5147 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5148 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5149 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5150 WREG32(RLC_SERDES_WR_CTRL, data);
5152 cik_update_rlc(rdev, tmp);
5154 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5155 orig = data = RREG32(CGTS_SM_CTRL_REG);
5156 data &= ~SM_MODE_MASK;
5157 data |= SM_MODE(0x2);
5158 data |= SM_MODE_ENABLE;
5159 data &= ~CGTS_OVERRIDE;
5160 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5161 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5162 data &= ~CGTS_LS_OVERRIDE;
5163 data &= ~ON_MONITOR_ADD_MASK;
5164 data |= ON_MONITOR_ADD_EN;
5165 data |= ON_MONITOR_ADD(0x96);
5167 WREG32(CGTS_SM_CTRL_REG, data);
5170 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5173 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5175 data = RREG32(RLC_MEM_SLP_CNTL);
5176 if (data & RLC_MEM_LS_EN) {
5177 data &= ~RLC_MEM_LS_EN;
5178 WREG32(RLC_MEM_SLP_CNTL, data);
5181 data = RREG32(CP_MEM_SLP_CNTL);
5182 if (data & CP_MEM_LS_EN) {
5183 data &= ~CP_MEM_LS_EN;
5184 WREG32(CP_MEM_SLP_CNTL, data);
5187 orig = data = RREG32(CGTS_SM_CTRL_REG);
5188 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5190 WREG32(CGTS_SM_CTRL_REG, data);
5192 tmp = cik_halt_rlc(rdev);
5194 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5195 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5196 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5197 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5198 WREG32(RLC_SERDES_WR_CTRL, data);
5200 cik_update_rlc(rdev, tmp);
5204 static const u32 mc_cg_registers[] =
5217 static void cik_enable_mc_ls(struct radeon_device *rdev,
5223 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5224 orig = data = RREG32(mc_cg_registers[i]);
5225 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5226 data |= MC_LS_ENABLE;
5228 data &= ~MC_LS_ENABLE;
5230 WREG32(mc_cg_registers[i], data);
5234 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5240 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5241 orig = data = RREG32(mc_cg_registers[i]);
5242 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5243 data |= MC_CG_ENABLE;
5245 data &= ~MC_CG_ENABLE;
5247 WREG32(mc_cg_registers[i], data);
5251 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5256 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5257 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5258 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5260 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5263 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5265 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5268 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5272 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5277 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5278 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5281 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5283 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5286 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5288 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5291 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5293 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5296 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5300 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5305 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5306 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5308 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5310 orig = data = RREG32(UVD_CGC_CTRL);
5313 WREG32(UVD_CGC_CTRL, data);
5315 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5317 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5319 orig = data = RREG32(UVD_CGC_CTRL);
5322 WREG32(UVD_CGC_CTRL, data);
5326 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5331 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5333 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5334 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5335 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5337 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5338 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5341 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5344 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5349 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5351 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5352 data &= ~CLOCK_GATING_DIS;
5354 data |= CLOCK_GATING_DIS;
5357 WREG32(HDP_HOST_PATH_CNTL, data);
5360 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5365 orig = data = RREG32(HDP_MEM_POWER_LS);
5367 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5368 data |= HDP_LS_ENABLE;
5370 data &= ~HDP_LS_ENABLE;
5373 WREG32(HDP_MEM_POWER_LS, data);
5376 void cik_update_cg(struct radeon_device *rdev,
5377 u32 block, bool enable)
5379 if (block & RADEON_CG_BLOCK_GFX) {
5380 /* order matters! */
5382 cik_enable_mgcg(rdev, true);
5383 cik_enable_cgcg(rdev, true);
5385 cik_enable_cgcg(rdev, false);
5386 cik_enable_mgcg(rdev, false);
5390 if (block & RADEON_CG_BLOCK_MC) {
5391 if (!(rdev->flags & RADEON_IS_IGP)) {
5392 cik_enable_mc_mgcg(rdev, enable);
5393 cik_enable_mc_ls(rdev, enable);
5397 if (block & RADEON_CG_BLOCK_SDMA) {
5398 cik_enable_sdma_mgcg(rdev, enable);
5399 cik_enable_sdma_mgls(rdev, enable);
5402 if (block & RADEON_CG_BLOCK_BIF) {
5403 cik_enable_bif_mgls(rdev, enable);
5406 if (block & RADEON_CG_BLOCK_UVD) {
5408 cik_enable_uvd_mgcg(rdev, enable);
5411 if (block & RADEON_CG_BLOCK_HDP) {
5412 cik_enable_hdp_mgcg(rdev, enable);
5413 cik_enable_hdp_ls(rdev, enable);
5417 static void cik_init_cg(struct radeon_device *rdev)
5420 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5423 si_init_uvd_internal_cg(rdev);
5425 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5426 RADEON_CG_BLOCK_SDMA |
5427 RADEON_CG_BLOCK_BIF |
5428 RADEON_CG_BLOCK_UVD |
5429 RADEON_CG_BLOCK_HDP), true);
5432 static void cik_fini_cg(struct radeon_device *rdev)
5434 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5435 RADEON_CG_BLOCK_SDMA |
5436 RADEON_CG_BLOCK_BIF |
5437 RADEON_CG_BLOCK_UVD |
5438 RADEON_CG_BLOCK_HDP), false);
5440 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5443 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5448 orig = data = RREG32(RLC_PG_CNTL);
5449 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5450 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5452 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5454 WREG32(RLC_PG_CNTL, data);
5457 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5462 orig = data = RREG32(RLC_PG_CNTL);
5463 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5464 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5466 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5468 WREG32(RLC_PG_CNTL, data);
5471 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5475 orig = data = RREG32(RLC_PG_CNTL);
5476 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5477 data &= ~DISABLE_CP_PG;
5479 data |= DISABLE_CP_PG;
5481 WREG32(RLC_PG_CNTL, data);
5484 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5488 orig = data = RREG32(RLC_PG_CNTL);
5489 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5490 data &= ~DISABLE_GDS_PG;
5492 data |= DISABLE_GDS_PG;
5494 WREG32(RLC_PG_CNTL, data);
5497 #define CP_ME_TABLE_SIZE 96
5498 #define CP_ME_TABLE_OFFSET 2048
5499 #define CP_MEC_TABLE_OFFSET 4096
5501 void cik_init_cp_pg_table(struct radeon_device *rdev)
5503 const __be32 *fw_data;
5504 volatile u32 *dst_ptr;
5505 int me, i, max_me = 4;
5509 if (rdev->family == CHIP_KAVERI)
5512 if (rdev->rlc.cp_table_ptr == NULL)
5515 /* write the cp table buffer */
5516 dst_ptr = rdev->rlc.cp_table_ptr;
5517 for (me = 0; me < max_me; me++) {
5519 fw_data = (const __be32 *)rdev->ce_fw->data;
5520 table_offset = CP_ME_TABLE_OFFSET;
5521 } else if (me == 1) {
5522 fw_data = (const __be32 *)rdev->pfp_fw->data;
5523 table_offset = CP_ME_TABLE_OFFSET;
5524 } else if (me == 2) {
5525 fw_data = (const __be32 *)rdev->me_fw->data;
5526 table_offset = CP_ME_TABLE_OFFSET;
5528 fw_data = (const __be32 *)rdev->mec_fw->data;
5529 table_offset = CP_MEC_TABLE_OFFSET;
5532 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5533 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5535 bo_offset += CP_ME_TABLE_SIZE;
5539 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5544 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
5545 orig = data = RREG32(RLC_PG_CNTL);
5546 data |= GFX_PG_ENABLE;
5548 WREG32(RLC_PG_CNTL, data);
5550 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5553 WREG32(RLC_AUTO_PG_CTRL, data);
5555 orig = data = RREG32(RLC_PG_CNTL);
5556 data &= ~GFX_PG_ENABLE;
5558 WREG32(RLC_PG_CNTL, data);
5560 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5561 data &= ~AUTO_PG_EN;
5563 WREG32(RLC_AUTO_PG_CTRL, data);
5565 data = RREG32(DB_RENDER_CONTROL);
5569 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5571 u32 mask = 0, tmp, tmp1;
5574 cik_select_se_sh(rdev, se, sh);
5575 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5576 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5577 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5584 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5589 return (~tmp) & mask;
5592 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5594 u32 i, j, k, active_cu_number = 0;
5595 u32 mask, counter, cu_bitmap;
5598 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5599 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5603 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5604 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5612 active_cu_number += counter;
5613 tmp |= (cu_bitmap << (i * 16 + j * 8));
5617 WREG32(RLC_PG_AO_CU_MASK, tmp);
5619 tmp = RREG32(RLC_MAX_PG_CU);
5620 tmp &= ~MAX_PU_CU_MASK;
5621 tmp |= MAX_PU_CU(active_cu_number);
5622 WREG32(RLC_MAX_PG_CU, tmp);
5625 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5630 orig = data = RREG32(RLC_PG_CNTL);
5631 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5632 data |= STATIC_PER_CU_PG_ENABLE;
5634 data &= ~STATIC_PER_CU_PG_ENABLE;
5636 WREG32(RLC_PG_CNTL, data);
5639 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5644 orig = data = RREG32(RLC_PG_CNTL);
5645 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5646 data |= DYN_PER_CU_PG_ENABLE;
5648 data &= ~DYN_PER_CU_PG_ENABLE;
5650 WREG32(RLC_PG_CNTL, data);
5653 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5654 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5656 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5661 if (rdev->rlc.cs_data) {
5662 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5663 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5664 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5665 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5667 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5668 for (i = 0; i < 3; i++)
5669 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5671 if (rdev->rlc.reg_list) {
5672 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5673 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5674 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5677 orig = data = RREG32(RLC_PG_CNTL);
5680 WREG32(RLC_PG_CNTL, data);
5682 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5683 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5685 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5686 data &= ~IDLE_POLL_COUNT_MASK;
5687 data |= IDLE_POLL_COUNT(0x60);
5688 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5691 WREG32(RLC_PG_DELAY, data);
5693 data = RREG32(RLC_PG_DELAY_2);
5696 WREG32(RLC_PG_DELAY_2, data);
5698 data = RREG32(RLC_AUTO_PG_CTRL);
5699 data &= ~GRBM_REG_SGIT_MASK;
5700 data |= GRBM_REG_SGIT(0x700);
5701 WREG32(RLC_AUTO_PG_CTRL, data);
5705 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5707 cik_enable_gfx_cgpg(rdev, enable);
5708 cik_enable_gfx_static_mgpg(rdev, enable);
5709 cik_enable_gfx_dynamic_mgpg(rdev, enable);
5712 u32 cik_get_csb_size(struct radeon_device *rdev)
5715 const struct cs_section_def *sect = NULL;
5716 const struct cs_extent_def *ext = NULL;
5718 if (rdev->rlc.cs_data == NULL)
5721 /* begin clear state */
5723 /* context control state */
5726 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5727 for (ext = sect->section; ext->extent != NULL; ++ext) {
5728 if (sect->id == SECT_CONTEXT)
5729 count += 2 + ext->reg_count;
5734 /* pa_sc_raster_config/pa_sc_raster_config1 */
5736 /* end clear state */
5744 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5747 const struct cs_section_def *sect = NULL;
5748 const struct cs_extent_def *ext = NULL;
5750 if (rdev->rlc.cs_data == NULL)
5755 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5756 buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5758 buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5759 buffer[count++] = 0x80000000;
5760 buffer[count++] = 0x80000000;
5762 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5763 for (ext = sect->section; ext->extent != NULL; ++ext) {
5764 if (sect->id == SECT_CONTEXT) {
5765 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5766 buffer[count++] = ext->reg_index - 0xa000;
5767 for (i = 0; i < ext->reg_count; i++)
5768 buffer[count++] = ext->extent[i];
5775 buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5776 buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5777 switch (rdev->family) {
5779 buffer[count++] = 0x16000012;
5780 buffer[count++] = 0x00000000;
5783 buffer[count++] = 0x00000000; /* XXX */
5784 buffer[count++] = 0x00000000;
5787 buffer[count++] = 0x00000000; /* XXX */
5788 buffer[count++] = 0x00000000;
5791 buffer[count++] = 0x00000000;
5792 buffer[count++] = 0x00000000;
5796 buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5797 buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5799 buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5800 buffer[count++] = 0;
5803 static void cik_init_pg(struct radeon_device *rdev)
5805 if (rdev->pg_flags) {
5806 cik_enable_sck_slowdown_on_pu(rdev, true);
5807 cik_enable_sck_slowdown_on_pd(rdev, true);
5808 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5809 cik_init_gfx_cgpg(rdev);
5810 cik_enable_cp_pg(rdev, true);
5811 cik_enable_gds_pg(rdev, true);
5813 cik_init_ao_cu_mask(rdev);
5814 cik_update_gfx_pg(rdev, true);
5818 static void cik_fini_pg(struct radeon_device *rdev)
5820 if (rdev->pg_flags) {
5821 cik_update_gfx_pg(rdev, false);
5822 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5823 cik_enable_cp_pg(rdev, false);
5824 cik_enable_gds_pg(rdev, false);
5831 * Starting with r6xx, interrupts are handled via a ring buffer.
5832 * Ring buffers are areas of GPU accessible memory that the GPU
5833 * writes interrupt vectors into and the host reads vectors out of.
5834 * There is a rptr (read pointer) that determines where the
5835 * host is currently reading, and a wptr (write pointer)
5836 * which determines where the GPU has written. When the
5837 * pointers are equal, the ring is idle. When the GPU
5838 * writes vectors to the ring buffer, it increments the
5839 * wptr. When there is an interrupt, the host then starts
5840 * fetching commands and processing them until the pointers are
5841 * equal again at which point it updates the rptr.
5845 * cik_enable_interrupts - Enable the interrupt ring buffer
5847 * @rdev: radeon_device pointer
5849 * Enable the interrupt ring buffer (CIK).
5851 static void cik_enable_interrupts(struct radeon_device *rdev)
5853 u32 ih_cntl = RREG32(IH_CNTL);
5854 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5856 ih_cntl |= ENABLE_INTR;
5857 ih_rb_cntl |= IH_RB_ENABLE;
5858 WREG32(IH_CNTL, ih_cntl);
5859 WREG32(IH_RB_CNTL, ih_rb_cntl);
5860 rdev->ih.enabled = true;
5864 * cik_disable_interrupts - Disable the interrupt ring buffer
5866 * @rdev: radeon_device pointer
5868 * Disable the interrupt ring buffer (CIK).
5870 static void cik_disable_interrupts(struct radeon_device *rdev)
5872 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5873 u32 ih_cntl = RREG32(IH_CNTL);
5875 ih_rb_cntl &= ~IH_RB_ENABLE;
5876 ih_cntl &= ~ENABLE_INTR;
5877 WREG32(IH_RB_CNTL, ih_rb_cntl);
5878 WREG32(IH_CNTL, ih_cntl);
5879 /* set rptr, wptr to 0 */
5880 WREG32(IH_RB_RPTR, 0);
5881 WREG32(IH_RB_WPTR, 0);
5882 rdev->ih.enabled = false;
5887 * cik_disable_interrupt_state - Disable all interrupt sources
5889 * @rdev: radeon_device pointer
5891 * Clear all interrupt enable bits used by the driver (CIK).
5893 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5898 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5900 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5901 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5902 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5903 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5904 /* compute queues */
5905 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5906 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5907 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5908 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5909 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5910 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5911 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5912 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5914 WREG32(GRBM_INT_CNTL, 0);
5915 /* vline/vblank, etc. */
5916 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5917 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5918 if (rdev->num_crtc >= 4) {
5919 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5920 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5922 if (rdev->num_crtc >= 6) {
5923 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5924 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5928 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5930 /* digital hotplug */
5931 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5932 WREG32(DC_HPD1_INT_CONTROL, tmp);
5933 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5934 WREG32(DC_HPD2_INT_CONTROL, tmp);
5935 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5936 WREG32(DC_HPD3_INT_CONTROL, tmp);
5937 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5938 WREG32(DC_HPD4_INT_CONTROL, tmp);
5939 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5940 WREG32(DC_HPD5_INT_CONTROL, tmp);
5941 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5942 WREG32(DC_HPD6_INT_CONTROL, tmp);
5947 * cik_irq_init - init and enable the interrupt ring
5949 * @rdev: radeon_device pointer
5951 * Allocate a ring buffer for the interrupt controller,
5952 * enable the RLC, disable interrupts, enable the IH
5953 * ring buffer and enable it (CIK).
5954 * Called at device load and reume.
5955 * Returns 0 for success, errors for failure.
5957 static int cik_irq_init(struct radeon_device *rdev)
5961 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5964 ret = r600_ih_ring_alloc(rdev);
5969 cik_disable_interrupts(rdev);
5972 ret = cik_rlc_resume(rdev);
5974 r600_ih_ring_fini(rdev);
5978 /* setup interrupt control */
5979 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5980 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5981 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5982 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5983 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5985 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5986 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5987 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5988 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5990 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5991 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5993 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5994 IH_WPTR_OVERFLOW_CLEAR |
5997 if (rdev->wb.enabled)
5998 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6000 /* set the writeback address whether it's enabled or not */
6001 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6002 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6004 WREG32(IH_RB_CNTL, ih_rb_cntl);
6006 /* set rptr, wptr to 0 */
6007 WREG32(IH_RB_RPTR, 0);
6008 WREG32(IH_RB_WPTR, 0);
6010 /* Default settings for IH_CNTL (disabled at first) */
6011 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6012 /* RPTR_REARM only works if msi's are enabled */
6013 if (rdev->msi_enabled)
6014 ih_cntl |= RPTR_REARM;
6015 WREG32(IH_CNTL, ih_cntl);
6017 /* force the active interrupt state to all disabled */
6018 cik_disable_interrupt_state(rdev);
6020 pci_set_master(rdev->pdev);
6023 cik_enable_interrupts(rdev);
6029 * cik_irq_set - enable/disable interrupt sources
6031 * @rdev: radeon_device pointer
6033 * Enable interrupt sources on the GPU (vblanks, hpd,
6035 * Returns 0 for success, errors for failure.
6037 int cik_irq_set(struct radeon_device *rdev)
6039 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6040 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6041 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6042 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6043 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6044 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6045 u32 grbm_int_cntl = 0;
6046 u32 dma_cntl, dma_cntl1;
6049 if (!rdev->irq.installed) {
6050 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6053 /* don't enable anything if the ih is disabled */
6054 if (!rdev->ih.enabled) {
6055 cik_disable_interrupts(rdev);
6056 /* force the active interrupt state to all disabled */
6057 cik_disable_interrupt_state(rdev);
6061 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6065 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6066 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6068 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6069 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6072 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6073 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6074 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6075 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6076 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6077 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6078 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6080 if (rdev->flags & RADEON_IS_IGP)
6081 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6082 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6084 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6085 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6087 /* enable CP interrupts on all rings */
6088 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6089 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6090 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6092 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6093 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6094 DRM_DEBUG("si_irq_set: sw int cp1\n");
6095 if (ring->me == 1) {
6096 switch (ring->pipe) {
6098 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6101 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6104 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6107 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6110 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6113 } else if (ring->me == 2) {
6114 switch (ring->pipe) {
6116 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6119 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6122 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6125 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6128 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6132 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6135 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6136 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6137 DRM_DEBUG("si_irq_set: sw int cp2\n");
6138 if (ring->me == 1) {
6139 switch (ring->pipe) {
6141 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6144 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6147 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6150 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6153 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6156 } else if (ring->me == 2) {
6157 switch (ring->pipe) {
6159 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6162 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6165 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6168 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6171 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6175 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6179 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6180 DRM_DEBUG("cik_irq_set: sw int dma\n");
6181 dma_cntl |= TRAP_ENABLE;
6184 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6185 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6186 dma_cntl1 |= TRAP_ENABLE;
6189 if (rdev->irq.crtc_vblank_int[0] ||
6190 atomic_read(&rdev->irq.pflip[0])) {
6191 DRM_DEBUG("cik_irq_set: vblank 0\n");
6192 crtc1 |= VBLANK_INTERRUPT_MASK;
6194 if (rdev->irq.crtc_vblank_int[1] ||
6195 atomic_read(&rdev->irq.pflip[1])) {
6196 DRM_DEBUG("cik_irq_set: vblank 1\n");
6197 crtc2 |= VBLANK_INTERRUPT_MASK;
6199 if (rdev->irq.crtc_vblank_int[2] ||
6200 atomic_read(&rdev->irq.pflip[2])) {
6201 DRM_DEBUG("cik_irq_set: vblank 2\n");
6202 crtc3 |= VBLANK_INTERRUPT_MASK;
6204 if (rdev->irq.crtc_vblank_int[3] ||
6205 atomic_read(&rdev->irq.pflip[3])) {
6206 DRM_DEBUG("cik_irq_set: vblank 3\n");
6207 crtc4 |= VBLANK_INTERRUPT_MASK;
6209 if (rdev->irq.crtc_vblank_int[4] ||
6210 atomic_read(&rdev->irq.pflip[4])) {
6211 DRM_DEBUG("cik_irq_set: vblank 4\n");
6212 crtc5 |= VBLANK_INTERRUPT_MASK;
6214 if (rdev->irq.crtc_vblank_int[5] ||
6215 atomic_read(&rdev->irq.pflip[5])) {
6216 DRM_DEBUG("cik_irq_set: vblank 5\n");
6217 crtc6 |= VBLANK_INTERRUPT_MASK;
6219 if (rdev->irq.hpd[0]) {
6220 DRM_DEBUG("cik_irq_set: hpd 1\n");
6221 hpd1 |= DC_HPDx_INT_EN;
6223 if (rdev->irq.hpd[1]) {
6224 DRM_DEBUG("cik_irq_set: hpd 2\n");
6225 hpd2 |= DC_HPDx_INT_EN;
6227 if (rdev->irq.hpd[2]) {
6228 DRM_DEBUG("cik_irq_set: hpd 3\n");
6229 hpd3 |= DC_HPDx_INT_EN;
6231 if (rdev->irq.hpd[3]) {
6232 DRM_DEBUG("cik_irq_set: hpd 4\n");
6233 hpd4 |= DC_HPDx_INT_EN;
6235 if (rdev->irq.hpd[4]) {
6236 DRM_DEBUG("cik_irq_set: hpd 5\n");
6237 hpd5 |= DC_HPDx_INT_EN;
6239 if (rdev->irq.hpd[5]) {
6240 DRM_DEBUG("cik_irq_set: hpd 6\n");
6241 hpd6 |= DC_HPDx_INT_EN;
6244 if (rdev->irq.dpm_thermal) {
6245 DRM_DEBUG("dpm thermal\n");
6246 if (rdev->flags & RADEON_IS_IGP)
6247 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6249 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6252 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6254 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6255 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6257 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6258 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6259 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6260 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6261 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6262 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6263 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6264 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6266 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6268 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6269 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6270 if (rdev->num_crtc >= 4) {
6271 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6272 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6274 if (rdev->num_crtc >= 6) {
6275 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6276 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6279 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6280 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6281 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6282 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6283 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6284 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6286 if (rdev->flags & RADEON_IS_IGP)
6287 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6289 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6295 * cik_irq_ack - ack interrupt sources
6297 * @rdev: radeon_device pointer
6299 * Ack interrupt sources on the GPU (vblanks, hpd,
6300 * etc.) (CIK). Certain interrupts sources are sw
6301 * generated and do not require an explicit ack.
6303 static inline void cik_irq_ack(struct radeon_device *rdev)
6307 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6308 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6309 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6310 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6311 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6312 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6313 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6315 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6316 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6317 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6318 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6319 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6320 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6321 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6322 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6324 if (rdev->num_crtc >= 4) {
6325 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6326 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6327 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6328 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6329 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6330 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6331 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6332 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6335 if (rdev->num_crtc >= 6) {
6336 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6338 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6340 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6341 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6342 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6343 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6346 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6347 tmp = RREG32(DC_HPD1_INT_CONTROL);
6348 tmp |= DC_HPDx_INT_ACK;
6349 WREG32(DC_HPD1_INT_CONTROL, tmp);
6351 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6352 tmp = RREG32(DC_HPD2_INT_CONTROL);
6353 tmp |= DC_HPDx_INT_ACK;
6354 WREG32(DC_HPD2_INT_CONTROL, tmp);
6356 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6357 tmp = RREG32(DC_HPD3_INT_CONTROL);
6358 tmp |= DC_HPDx_INT_ACK;
6359 WREG32(DC_HPD3_INT_CONTROL, tmp);
6361 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6362 tmp = RREG32(DC_HPD4_INT_CONTROL);
6363 tmp |= DC_HPDx_INT_ACK;
6364 WREG32(DC_HPD4_INT_CONTROL, tmp);
6366 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6367 tmp = RREG32(DC_HPD5_INT_CONTROL);
6368 tmp |= DC_HPDx_INT_ACK;
6369 WREG32(DC_HPD5_INT_CONTROL, tmp);
6371 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6372 tmp = RREG32(DC_HPD5_INT_CONTROL);
6373 tmp |= DC_HPDx_INT_ACK;
6374 WREG32(DC_HPD6_INT_CONTROL, tmp);
6379 * cik_irq_disable - disable interrupts
6381 * @rdev: radeon_device pointer
6383 * Disable interrupts on the hw (CIK).
6385 static void cik_irq_disable(struct radeon_device *rdev)
6387 cik_disable_interrupts(rdev);
6388 /* Wait and acknowledge irq */
6391 cik_disable_interrupt_state(rdev);
6395 * cik_irq_disable - disable interrupts for suspend
6397 * @rdev: radeon_device pointer
6399 * Disable interrupts and stop the RLC (CIK).
6402 static void cik_irq_suspend(struct radeon_device *rdev)
6404 cik_irq_disable(rdev);
6409 * cik_irq_fini - tear down interrupt support
6411 * @rdev: radeon_device pointer
6413 * Disable interrupts on the hw and free the IH ring
6415 * Used for driver unload.
6417 static void cik_irq_fini(struct radeon_device *rdev)
6419 cik_irq_suspend(rdev);
6420 r600_ih_ring_fini(rdev);
6424 * cik_get_ih_wptr - get the IH ring buffer wptr
6426 * @rdev: radeon_device pointer
6428 * Get the IH ring buffer wptr from either the register
6429 * or the writeback memory buffer (CIK). Also check for
6430 * ring buffer overflow and deal with it.
6431 * Used by cik_irq_process().
6432 * Returns the value of the wptr.
6434 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6438 if (rdev->wb.enabled)
6439 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6441 wptr = RREG32(IH_RB_WPTR);
6443 if (wptr & RB_OVERFLOW) {
6444 /* When a ring buffer overflow happen start parsing interrupt
6445 * from the last not overwritten vector (wptr + 16). Hopefully
6446 * this should allow us to catchup.
6448 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6449 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6450 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6451 tmp = RREG32(IH_RB_CNTL);
6452 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6453 WREG32(IH_RB_CNTL, tmp);
6455 return (wptr & rdev->ih.ptr_mask);
6459 * Each IV ring entry is 128 bits:
6460 * [7:0] - interrupt source id
6462 * [59:32] - interrupt source data
6463 * [63:60] - reserved
6466 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6467 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6468 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6469 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6470 * PIPE_ID - ME0 0=3D
6471 * - ME1&2 compute dispatcher (4 pipes each)
6473 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6474 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6475 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6478 * [127:96] - reserved
6481 * cik_irq_process - interrupt handler
6483 * @rdev: radeon_device pointer
6485 * Interrupt hander (CIK). Walk the IH ring,
6486 * ack interrupts and schedule work to handle
6488 * Returns irq process return code.
6490 int cik_irq_process(struct radeon_device *rdev)
6492 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6493 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6496 u32 src_id, src_data, ring_id;
6497 u8 me_id, pipe_id, queue_id;
6499 bool queue_hotplug = false;
6500 bool queue_reset = false;
6501 u32 addr, status, mc_client;
6502 bool queue_thermal = false;
6504 if (!rdev->ih.enabled || rdev->shutdown)
6507 wptr = cik_get_ih_wptr(rdev);
6510 /* is somebody else already processing irqs? */
6511 if (atomic_xchg(&rdev->ih.lock, 1))
6514 rptr = rdev->ih.rptr;
6515 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6517 /* Order reading of wptr vs. reading of IH ring data */
6520 /* display interrupts */
6523 while (rptr != wptr) {
6524 /* wptr/rptr are in bytes! */
6525 ring_index = rptr / 4;
6526 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6527 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6528 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6531 case 1: /* D1 vblank/vline */
6533 case 0: /* D1 vblank */
6534 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6535 if (rdev->irq.crtc_vblank_int[0]) {
6536 drm_handle_vblank(rdev->ddev, 0);
6537 rdev->pm.vblank_sync = true;
6538 wake_up(&rdev->irq.vblank_queue);
6540 if (atomic_read(&rdev->irq.pflip[0]))
6541 radeon_crtc_handle_flip(rdev, 0);
6542 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6543 DRM_DEBUG("IH: D1 vblank\n");
6546 case 1: /* D1 vline */
6547 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6548 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6549 DRM_DEBUG("IH: D1 vline\n");
6553 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6557 case 2: /* D2 vblank/vline */
6559 case 0: /* D2 vblank */
6560 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6561 if (rdev->irq.crtc_vblank_int[1]) {
6562 drm_handle_vblank(rdev->ddev, 1);
6563 rdev->pm.vblank_sync = true;
6564 wake_up(&rdev->irq.vblank_queue);
6566 if (atomic_read(&rdev->irq.pflip[1]))
6567 radeon_crtc_handle_flip(rdev, 1);
6568 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6569 DRM_DEBUG("IH: D2 vblank\n");
6572 case 1: /* D2 vline */
6573 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6574 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6575 DRM_DEBUG("IH: D2 vline\n");
6579 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6583 case 3: /* D3 vblank/vline */
6585 case 0: /* D3 vblank */
6586 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6587 if (rdev->irq.crtc_vblank_int[2]) {
6588 drm_handle_vblank(rdev->ddev, 2);
6589 rdev->pm.vblank_sync = true;
6590 wake_up(&rdev->irq.vblank_queue);
6592 if (atomic_read(&rdev->irq.pflip[2]))
6593 radeon_crtc_handle_flip(rdev, 2);
6594 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6595 DRM_DEBUG("IH: D3 vblank\n");
6598 case 1: /* D3 vline */
6599 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6600 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6601 DRM_DEBUG("IH: D3 vline\n");
6605 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6609 case 4: /* D4 vblank/vline */
6611 case 0: /* D4 vblank */
6612 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6613 if (rdev->irq.crtc_vblank_int[3]) {
6614 drm_handle_vblank(rdev->ddev, 3);
6615 rdev->pm.vblank_sync = true;
6616 wake_up(&rdev->irq.vblank_queue);
6618 if (atomic_read(&rdev->irq.pflip[3]))
6619 radeon_crtc_handle_flip(rdev, 3);
6620 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6621 DRM_DEBUG("IH: D4 vblank\n");
6624 case 1: /* D4 vline */
6625 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6626 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6627 DRM_DEBUG("IH: D4 vline\n");
6631 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6635 case 5: /* D5 vblank/vline */
6637 case 0: /* D5 vblank */
6638 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6639 if (rdev->irq.crtc_vblank_int[4]) {
6640 drm_handle_vblank(rdev->ddev, 4);
6641 rdev->pm.vblank_sync = true;
6642 wake_up(&rdev->irq.vblank_queue);
6644 if (atomic_read(&rdev->irq.pflip[4]))
6645 radeon_crtc_handle_flip(rdev, 4);
6646 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6647 DRM_DEBUG("IH: D5 vblank\n");
6650 case 1: /* D5 vline */
6651 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6652 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6653 DRM_DEBUG("IH: D5 vline\n");
6657 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6661 case 6: /* D6 vblank/vline */
6663 case 0: /* D6 vblank */
6664 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6665 if (rdev->irq.crtc_vblank_int[5]) {
6666 drm_handle_vblank(rdev->ddev, 5);
6667 rdev->pm.vblank_sync = true;
6668 wake_up(&rdev->irq.vblank_queue);
6670 if (atomic_read(&rdev->irq.pflip[5]))
6671 radeon_crtc_handle_flip(rdev, 5);
6672 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6673 DRM_DEBUG("IH: D6 vblank\n");
6676 case 1: /* D6 vline */
6677 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6678 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6679 DRM_DEBUG("IH: D6 vline\n");
6683 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6687 case 42: /* HPD hotplug */
6690 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6691 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6692 queue_hotplug = true;
6693 DRM_DEBUG("IH: HPD1\n");
6697 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6698 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6699 queue_hotplug = true;
6700 DRM_DEBUG("IH: HPD2\n");
6704 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6705 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6706 queue_hotplug = true;
6707 DRM_DEBUG("IH: HPD3\n");
6711 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6712 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6713 queue_hotplug = true;
6714 DRM_DEBUG("IH: HPD4\n");
6718 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6719 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6720 queue_hotplug = true;
6721 DRM_DEBUG("IH: HPD5\n");
6725 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6726 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6727 queue_hotplug = true;
6728 DRM_DEBUG("IH: HPD6\n");
6732 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6737 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6738 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6742 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6743 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6744 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6745 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6746 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
6748 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6750 cik_vm_decode_fault(rdev, status, addr, mc_client);
6751 /* reset addr and status */
6752 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6754 case 176: /* GFX RB CP_INT */
6755 case 177: /* GFX IB CP_INT */
6756 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6758 case 181: /* CP EOP event */
6759 DRM_DEBUG("IH: CP EOP\n");
6760 /* XXX check the bitfield order! */
6761 me_id = (ring_id & 0x60) >> 5;
6762 pipe_id = (ring_id & 0x18) >> 3;
6763 queue_id = (ring_id & 0x7) >> 0;
6766 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6770 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6771 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6772 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6773 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6777 case 184: /* CP Privileged reg access */
6778 DRM_ERROR("Illegal register access in command stream\n");
6779 /* XXX check the bitfield order! */
6780 me_id = (ring_id & 0x60) >> 5;
6781 pipe_id = (ring_id & 0x18) >> 3;
6782 queue_id = (ring_id & 0x7) >> 0;
6785 /* This results in a full GPU reset, but all we need to do is soft
6786 * reset the CP for gfx
6800 case 185: /* CP Privileged inst */
6801 DRM_ERROR("Illegal instruction in command stream\n");
6802 /* XXX check the bitfield order! */
6803 me_id = (ring_id & 0x60) >> 5;
6804 pipe_id = (ring_id & 0x18) >> 3;
6805 queue_id = (ring_id & 0x7) >> 0;
6808 /* This results in a full GPU reset, but all we need to do is soft
6809 * reset the CP for gfx
6823 case 224: /* SDMA trap event */
6824 /* XXX check the bitfield order! */
6825 me_id = (ring_id & 0x3) >> 0;
6826 queue_id = (ring_id & 0xc) >> 2;
6827 DRM_DEBUG("IH: SDMA trap\n");
6832 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6845 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6857 case 230: /* thermal low to high */
6858 DRM_DEBUG("IH: thermal low to high\n");
6859 rdev->pm.dpm.thermal.high_to_low = false;
6860 queue_thermal = true;
6862 case 231: /* thermal high to low */
6863 DRM_DEBUG("IH: thermal high to low\n");
6864 rdev->pm.dpm.thermal.high_to_low = true;
6865 queue_thermal = true;
6867 case 233: /* GUI IDLE */
6868 DRM_DEBUG("IH: GUI idle\n");
6870 case 241: /* SDMA Privileged inst */
6871 case 247: /* SDMA Privileged inst */
6872 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6873 /* XXX check the bitfield order! */
6874 me_id = (ring_id & 0x3) >> 0;
6875 queue_id = (ring_id & 0xc) >> 2;
6910 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6914 /* wptr/rptr are in bytes! */
6916 rptr &= rdev->ih.ptr_mask;
6919 schedule_work(&rdev->hotplug_work);
6921 schedule_work(&rdev->reset_work);
6923 schedule_work(&rdev->pm.dpm.thermal.work);
6924 rdev->ih.rptr = rptr;
6925 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6926 atomic_set(&rdev->ih.lock, 0);
6928 /* make sure wptr hasn't changed while processing */
6929 wptr = cik_get_ih_wptr(rdev);
6937 * startup/shutdown callbacks
6940 * cik_startup - program the asic to a functional state
6942 * @rdev: radeon_device pointer
6944 * Programs the asic to a functional state (CIK).
6945 * Called by cik_init() and cik_resume().
6946 * Returns 0 for success, error for failure.
6948 static int cik_startup(struct radeon_device *rdev)
6950 struct radeon_ring *ring;
6953 /* enable pcie gen2/3 link */
6954 cik_pcie_gen3_enable(rdev);
6956 cik_program_aspm(rdev);
6958 /* scratch needs to be initialized before MC */
6959 r = r600_vram_scratch_init(rdev);
6963 cik_mc_program(rdev);
6965 if (rdev->flags & RADEON_IS_IGP) {
6966 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6967 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6968 r = cik_init_microcode(rdev);
6970 DRM_ERROR("Failed to load firmware!\n");
6975 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6976 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6978 r = cik_init_microcode(rdev);
6980 DRM_ERROR("Failed to load firmware!\n");
6985 r = ci_mc_load_microcode(rdev);
6987 DRM_ERROR("Failed to load MC firmware!\n");
6992 r = cik_pcie_gart_enable(rdev);
6997 /* allocate rlc buffers */
6998 if (rdev->flags & RADEON_IS_IGP) {
6999 if (rdev->family == CHIP_KAVERI) {
7000 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7001 rdev->rlc.reg_list_size =
7002 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7004 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7005 rdev->rlc.reg_list_size =
7006 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7009 rdev->rlc.cs_data = ci_cs_data;
7010 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7011 r = sumo_rlc_init(rdev);
7013 DRM_ERROR("Failed to init rlc BOs!\n");
7017 /* allocate wb buffer */
7018 r = radeon_wb_init(rdev);
7022 /* allocate mec buffers */
7023 r = cik_mec_init(rdev);
7025 DRM_ERROR("Failed to init MEC BOs!\n");
7029 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7031 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7035 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7037 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7041 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7043 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7047 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7049 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7053 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7055 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7059 r = radeon_uvd_resume(rdev);
7061 r = uvd_v4_2_resume(rdev);
7063 r = radeon_fence_driver_start_ring(rdev,
7064 R600_RING_TYPE_UVD_INDEX);
7066 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7070 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7073 if (!rdev->irq.installed) {
7074 r = radeon_irq_kms_init(rdev);
7079 r = cik_irq_init(rdev);
7081 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7082 radeon_irq_kms_fini(rdev);
7087 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7088 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7089 CP_RB0_RPTR, CP_RB0_WPTR,
7094 /* set up the compute queues */
7095 /* type-2 packets are deprecated on MEC, use type-3 instead */
7096 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7097 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7098 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7099 PACKET3(PACKET3_NOP, 0x3FFF));
7102 ring->me = 1; /* first MEC */
7103 ring->pipe = 0; /* first pipe */
7104 ring->queue = 0; /* first queue */
7105 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7107 /* type-2 packets are deprecated on MEC, use type-3 instead */
7108 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7109 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7110 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7111 PACKET3(PACKET3_NOP, 0x3FFF));
7114 /* dGPU only have 1 MEC */
7115 ring->me = 1; /* first MEC */
7116 ring->pipe = 0; /* first pipe */
7117 ring->queue = 1; /* second queue */
7118 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7120 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7121 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7122 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7123 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7124 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7128 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7129 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7130 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7131 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7132 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7136 r = cik_cp_resume(rdev);
7140 r = cik_sdma_resume(rdev);
7144 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7145 if (ring->ring_size) {
7146 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7147 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7150 r = uvd_v1_0_init(rdev);
7152 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7155 r = radeon_ib_pool_init(rdev);
7157 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7161 r = radeon_vm_manager_init(rdev);
7163 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7167 r = dce6_audio_init(rdev);
7175 * cik_resume - resume the asic to a functional state
7177 * @rdev: radeon_device pointer
7179 * Programs the asic to a functional state (CIK).
7181 * Returns 0 for success, error for failure.
7183 int cik_resume(struct radeon_device *rdev)
7188 atom_asic_init(rdev->mode_info.atom_context);
7190 /* init golden registers */
7191 cik_init_golden_registers(rdev);
7193 rdev->accel_working = true;
7194 r = cik_startup(rdev);
7196 DRM_ERROR("cik startup failed on resume\n");
7197 rdev->accel_working = false;
7206 * cik_suspend - suspend the asic
7208 * @rdev: radeon_device pointer
7210 * Bring the chip into a state suitable for suspend (CIK).
7211 * Called at suspend.
7212 * Returns 0 for success.
7214 int cik_suspend(struct radeon_device *rdev)
7216 dce6_audio_fini(rdev);
7217 radeon_vm_manager_fini(rdev);
7218 cik_cp_enable(rdev, false);
7219 cik_sdma_enable(rdev, false);
7220 uvd_v1_0_fini(rdev);
7221 radeon_uvd_suspend(rdev);
7224 cik_irq_suspend(rdev);
7225 radeon_wb_disable(rdev);
7226 cik_pcie_gart_disable(rdev);
7230 /* Plan is to move initialization in that function and use
7231 * helper function so that radeon_device_init pretty much
7232 * do nothing more than calling asic specific function. This
7233 * should also allow to remove a bunch of callback function
7237 * cik_init - asic specific driver and hw init
7239 * @rdev: radeon_device pointer
7241 * Setup asic specific driver variables and program the hw
7242 * to a functional state (CIK).
7243 * Called at driver startup.
7244 * Returns 0 for success, errors for failure.
7246 int cik_init(struct radeon_device *rdev)
7248 struct radeon_ring *ring;
7252 if (!radeon_get_bios(rdev)) {
7253 if (ASIC_IS_AVIVO(rdev))
7256 /* Must be an ATOMBIOS */
7257 if (!rdev->is_atom_bios) {
7258 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7261 r = radeon_atombios_init(rdev);
7265 /* Post card if necessary */
7266 if (!radeon_card_posted(rdev)) {
7268 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7271 DRM_INFO("GPU not posted. posting now...\n");
7272 atom_asic_init(rdev->mode_info.atom_context);
7274 /* init golden registers */
7275 cik_init_golden_registers(rdev);
7276 /* Initialize scratch registers */
7277 cik_scratch_init(rdev);
7278 /* Initialize surface registers */
7279 radeon_surface_init(rdev);
7280 /* Initialize clocks */
7281 radeon_get_clock_info(rdev->ddev);
7284 r = radeon_fence_driver_init(rdev);
7288 /* initialize memory controller */
7289 r = cik_mc_init(rdev);
7292 /* Memory manager */
7293 r = radeon_bo_init(rdev);
7297 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7298 ring->ring_obj = NULL;
7299 r600_ring_init(rdev, ring, 1024 * 1024);
7301 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7302 ring->ring_obj = NULL;
7303 r600_ring_init(rdev, ring, 1024 * 1024);
7304 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7308 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7309 ring->ring_obj = NULL;
7310 r600_ring_init(rdev, ring, 1024 * 1024);
7311 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7315 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7316 ring->ring_obj = NULL;
7317 r600_ring_init(rdev, ring, 256 * 1024);
7319 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7320 ring->ring_obj = NULL;
7321 r600_ring_init(rdev, ring, 256 * 1024);
7323 r = radeon_uvd_init(rdev);
7325 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7326 ring->ring_obj = NULL;
7327 r600_ring_init(rdev, ring, 4096);
7330 rdev->ih.ring_obj = NULL;
7331 r600_ih_ring_init(rdev, 64 * 1024);
7333 r = r600_pcie_gart_init(rdev);
7337 rdev->accel_working = true;
7338 r = cik_startup(rdev);
7340 dev_err(rdev->dev, "disabling GPU acceleration\n");
7342 cik_sdma_fini(rdev);
7344 sumo_rlc_fini(rdev);
7346 radeon_wb_fini(rdev);
7347 radeon_ib_pool_fini(rdev);
7348 radeon_vm_manager_fini(rdev);
7349 radeon_irq_kms_fini(rdev);
7350 cik_pcie_gart_fini(rdev);
7351 rdev->accel_working = false;
7354 /* Don't start up if the MC ucode is missing.
7355 * The default clocks and voltages before the MC ucode
7356 * is loaded are not suffient for advanced operations.
7358 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7359 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7367 * cik_fini - asic specific driver and hw fini
7369 * @rdev: radeon_device pointer
7371 * Tear down the asic specific driver variables and program the hw
7372 * to an idle state (CIK).
7373 * Called at driver unload.
7375 void cik_fini(struct radeon_device *rdev)
7378 cik_sdma_fini(rdev);
7382 sumo_rlc_fini(rdev);
7384 radeon_wb_fini(rdev);
7385 radeon_vm_manager_fini(rdev);
7386 radeon_ib_pool_fini(rdev);
7387 radeon_irq_kms_fini(rdev);
7388 uvd_v1_0_fini(rdev);
7389 radeon_uvd_fini(rdev);
7390 cik_pcie_gart_fini(rdev);
7391 r600_vram_scratch_fini(rdev);
7392 radeon_gem_fini(rdev);
7393 radeon_fence_driver_fini(rdev);
7394 radeon_bo_fini(rdev);
7395 radeon_atombios_fini(rdev);
7400 /* display watermark setup */
7402 * dce8_line_buffer_adjust - Set up the line buffer
7404 * @rdev: radeon_device pointer
7405 * @radeon_crtc: the selected display controller
7406 * @mode: the current display mode on the selected display
7409 * Setup up the line buffer allocation for
7410 * the selected display controller (CIK).
7411 * Returns the line buffer size in pixels.
7413 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7414 struct radeon_crtc *radeon_crtc,
7415 struct drm_display_mode *mode)
7417 u32 tmp, buffer_alloc, i;
7418 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7421 * There are 6 line buffers, one for each display controllers.
7422 * There are 3 partitions per LB. Select the number of partitions
7423 * to enable based on the display width. For display widths larger
7424 * than 4096, you need use to use 2 display controllers and combine
7425 * them using the stereo blender.
7427 if (radeon_crtc->base.enabled && mode) {
7428 if (mode->crtc_hdisplay < 1920) {
7431 } else if (mode->crtc_hdisplay < 2560) {
7434 } else if (mode->crtc_hdisplay < 4096) {
7436 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7438 DRM_DEBUG_KMS("Mode too big for LB!\n");
7440 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7447 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7448 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7450 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7451 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7452 for (i = 0; i < rdev->usec_timeout; i++) {
7453 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7454 DMIF_BUFFERS_ALLOCATED_COMPLETED)
7459 if (radeon_crtc->base.enabled && mode) {
7471 /* controller not enabled, so no lb used */
7476 * cik_get_number_of_dram_channels - get the number of dram channels
7478 * @rdev: radeon_device pointer
7480 * Look up the number of video ram channels (CIK).
7481 * Used for display watermark bandwidth calculations
7482 * Returns the number of dram channels
7484 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7486 u32 tmp = RREG32(MC_SHARED_CHMAP);
7488 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7511 struct dce8_wm_params {
7512 u32 dram_channels; /* number of dram channels */
7513 u32 yclk; /* bandwidth per dram data pin in kHz */
7514 u32 sclk; /* engine clock in kHz */
7515 u32 disp_clk; /* display clock in kHz */
7516 u32 src_width; /* viewport width */
7517 u32 active_time; /* active display time in ns */
7518 u32 blank_time; /* blank time in ns */
7519 bool interlaced; /* mode is interlaced */
7520 fixed20_12 vsc; /* vertical scale ratio */
7521 u32 num_heads; /* number of active crtcs */
7522 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7523 u32 lb_size; /* line buffer allocated to pipe */
7524 u32 vtaps; /* vertical scaler taps */
7528 * dce8_dram_bandwidth - get the dram bandwidth
7530 * @wm: watermark calculation data
7532 * Calculate the raw dram bandwidth (CIK).
7533 * Used for display watermark bandwidth calculations
7534 * Returns the dram bandwidth in MBytes/s
7536 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7538 /* Calculate raw DRAM Bandwidth */
7539 fixed20_12 dram_efficiency; /* 0.7 */
7540 fixed20_12 yclk, dram_channels, bandwidth;
7543 a.full = dfixed_const(1000);
7544 yclk.full = dfixed_const(wm->yclk);
7545 yclk.full = dfixed_div(yclk, a);
7546 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7547 a.full = dfixed_const(10);
7548 dram_efficiency.full = dfixed_const(7);
7549 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7550 bandwidth.full = dfixed_mul(dram_channels, yclk);
7551 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7553 return dfixed_trunc(bandwidth);
7557 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7559 * @wm: watermark calculation data
7561 * Calculate the dram bandwidth used for display (CIK).
7562 * Used for display watermark bandwidth calculations
7563 * Returns the dram bandwidth for display in MBytes/s
7565 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7567 /* Calculate DRAM Bandwidth and the part allocated to display. */
7568 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7569 fixed20_12 yclk, dram_channels, bandwidth;
7572 a.full = dfixed_const(1000);
7573 yclk.full = dfixed_const(wm->yclk);
7574 yclk.full = dfixed_div(yclk, a);
7575 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7576 a.full = dfixed_const(10);
7577 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7578 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7579 bandwidth.full = dfixed_mul(dram_channels, yclk);
7580 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7582 return dfixed_trunc(bandwidth);
7586 * dce8_data_return_bandwidth - get the data return bandwidth
7588 * @wm: watermark calculation data
7590 * Calculate the data return bandwidth used for display (CIK).
7591 * Used for display watermark bandwidth calculations
7592 * Returns the data return bandwidth in MBytes/s
7594 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7596 /* Calculate the display Data return Bandwidth */
7597 fixed20_12 return_efficiency; /* 0.8 */
7598 fixed20_12 sclk, bandwidth;
7601 a.full = dfixed_const(1000);
7602 sclk.full = dfixed_const(wm->sclk);
7603 sclk.full = dfixed_div(sclk, a);
7604 a.full = dfixed_const(10);
7605 return_efficiency.full = dfixed_const(8);
7606 return_efficiency.full = dfixed_div(return_efficiency, a);
7607 a.full = dfixed_const(32);
7608 bandwidth.full = dfixed_mul(a, sclk);
7609 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7611 return dfixed_trunc(bandwidth);
7615 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7617 * @wm: watermark calculation data
7619 * Calculate the dmif bandwidth used for display (CIK).
7620 * Used for display watermark bandwidth calculations
7621 * Returns the dmif bandwidth in MBytes/s
7623 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7625 /* Calculate the DMIF Request Bandwidth */
7626 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7627 fixed20_12 disp_clk, bandwidth;
7630 a.full = dfixed_const(1000);
7631 disp_clk.full = dfixed_const(wm->disp_clk);
7632 disp_clk.full = dfixed_div(disp_clk, a);
7633 a.full = dfixed_const(32);
7634 b.full = dfixed_mul(a, disp_clk);
7636 a.full = dfixed_const(10);
7637 disp_clk_request_efficiency.full = dfixed_const(8);
7638 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7640 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7642 return dfixed_trunc(bandwidth);
7646 * dce8_available_bandwidth - get the min available bandwidth
7648 * @wm: watermark calculation data
7650 * Calculate the min available bandwidth used for display (CIK).
7651 * Used for display watermark bandwidth calculations
7652 * Returns the min available bandwidth in MBytes/s
7654 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7656 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7657 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7658 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7659 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7661 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7665 * dce8_average_bandwidth - get the average available bandwidth
7667 * @wm: watermark calculation data
7669 * Calculate the average available bandwidth used for display (CIK).
7670 * Used for display watermark bandwidth calculations
7671 * Returns the average available bandwidth in MBytes/s
7673 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7675 /* Calculate the display mode Average Bandwidth
7676 * DisplayMode should contain the source and destination dimensions,
7680 fixed20_12 line_time;
7681 fixed20_12 src_width;
7682 fixed20_12 bandwidth;
7685 a.full = dfixed_const(1000);
7686 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7687 line_time.full = dfixed_div(line_time, a);
7688 bpp.full = dfixed_const(wm->bytes_per_pixel);
7689 src_width.full = dfixed_const(wm->src_width);
7690 bandwidth.full = dfixed_mul(src_width, bpp);
7691 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7692 bandwidth.full = dfixed_div(bandwidth, line_time);
7694 return dfixed_trunc(bandwidth);
7698 * dce8_latency_watermark - get the latency watermark
7700 * @wm: watermark calculation data
7702 * Calculate the latency watermark (CIK).
7703 * Used for display watermark bandwidth calculations
7704 * Returns the latency watermark in ns
7706 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7708 /* First calculate the latency in ns */
7709 u32 mc_latency = 2000; /* 2000 ns. */
7710 u32 available_bandwidth = dce8_available_bandwidth(wm);
7711 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7712 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7713 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7714 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7715 (wm->num_heads * cursor_line_pair_return_time);
7716 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7717 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7718 u32 tmp, dmif_size = 12288;
7721 if (wm->num_heads == 0)
7724 a.full = dfixed_const(2);
7725 b.full = dfixed_const(1);
7726 if ((wm->vsc.full > a.full) ||
7727 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7729 ((wm->vsc.full >= a.full) && wm->interlaced))
7730 max_src_lines_per_dst_line = 4;
7732 max_src_lines_per_dst_line = 2;
7734 a.full = dfixed_const(available_bandwidth);
7735 b.full = dfixed_const(wm->num_heads);
7736 a.full = dfixed_div(a, b);
7738 b.full = dfixed_const(mc_latency + 512);
7739 c.full = dfixed_const(wm->disp_clk);
7740 b.full = dfixed_div(b, c);
7742 c.full = dfixed_const(dmif_size);
7743 b.full = dfixed_div(c, b);
7745 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7747 b.full = dfixed_const(1000);
7748 c.full = dfixed_const(wm->disp_clk);
7749 b.full = dfixed_div(c, b);
7750 c.full = dfixed_const(wm->bytes_per_pixel);
7751 b.full = dfixed_mul(b, c);
7753 lb_fill_bw = min(tmp, dfixed_trunc(b));
7755 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7756 b.full = dfixed_const(1000);
7757 c.full = dfixed_const(lb_fill_bw);
7758 b.full = dfixed_div(c, b);
7759 a.full = dfixed_div(a, b);
7760 line_fill_time = dfixed_trunc(a);
7762 if (line_fill_time < wm->active_time)
7765 return latency + (line_fill_time - wm->active_time);
7770 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7771 * average and available dram bandwidth
7773 * @wm: watermark calculation data
7775 * Check if the display average bandwidth fits in the display
7776 * dram bandwidth (CIK).
7777 * Used for display watermark bandwidth calculations
7778 * Returns true if the display fits, false if not.
7780 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7782 if (dce8_average_bandwidth(wm) <=
7783 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7790 * dce8_average_bandwidth_vs_available_bandwidth - check
7791 * average and available bandwidth
7793 * @wm: watermark calculation data
7795 * Check if the display average bandwidth fits in the display
7796 * available bandwidth (CIK).
7797 * Used for display watermark bandwidth calculations
7798 * Returns true if the display fits, false if not.
7800 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7802 if (dce8_average_bandwidth(wm) <=
7803 (dce8_available_bandwidth(wm) / wm->num_heads))
7810 * dce8_check_latency_hiding - check latency hiding
7812 * @wm: watermark calculation data
7814 * Check latency hiding (CIK).
7815 * Used for display watermark bandwidth calculations
7816 * Returns true if the display fits, false if not.
7818 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7820 u32 lb_partitions = wm->lb_size / wm->src_width;
7821 u32 line_time = wm->active_time + wm->blank_time;
7822 u32 latency_tolerant_lines;
7826 a.full = dfixed_const(1);
7827 if (wm->vsc.full > a.full)
7828 latency_tolerant_lines = 1;
7830 if (lb_partitions <= (wm->vtaps + 1))
7831 latency_tolerant_lines = 1;
7833 latency_tolerant_lines = 2;
7836 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7838 if (dce8_latency_watermark(wm) <= latency_hiding)
7845 * dce8_program_watermarks - program display watermarks
7847 * @rdev: radeon_device pointer
7848 * @radeon_crtc: the selected display controller
7849 * @lb_size: line buffer size
7850 * @num_heads: number of display controllers in use
7852 * Calculate and program the display watermarks for the
7853 * selected display controller (CIK).
7855 static void dce8_program_watermarks(struct radeon_device *rdev,
7856 struct radeon_crtc *radeon_crtc,
7857 u32 lb_size, u32 num_heads)
7859 struct drm_display_mode *mode = &radeon_crtc->base.mode;
7860 struct dce8_wm_params wm_low, wm_high;
7863 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7866 if (radeon_crtc->base.enabled && num_heads && mode) {
7867 pixel_period = 1000000 / (u32)mode->clock;
7868 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7870 /* watermark for high clocks */
7871 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7872 rdev->pm.dpm_enabled) {
7874 radeon_dpm_get_mclk(rdev, false) * 10;
7876 radeon_dpm_get_sclk(rdev, false) * 10;
7878 wm_high.yclk = rdev->pm.current_mclk * 10;
7879 wm_high.sclk = rdev->pm.current_sclk * 10;
7882 wm_high.disp_clk = mode->clock;
7883 wm_high.src_width = mode->crtc_hdisplay;
7884 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7885 wm_high.blank_time = line_time - wm_high.active_time;
7886 wm_high.interlaced = false;
7887 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7888 wm_high.interlaced = true;
7889 wm_high.vsc = radeon_crtc->vsc;
7891 if (radeon_crtc->rmx_type != RMX_OFF)
7893 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7894 wm_high.lb_size = lb_size;
7895 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7896 wm_high.num_heads = num_heads;
7898 /* set for high clocks */
7899 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7901 /* possibly force display priority to high */
7902 /* should really do this at mode validation time... */
7903 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7904 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7905 !dce8_check_latency_hiding(&wm_high) ||
7906 (rdev->disp_priority == 2)) {
7907 DRM_DEBUG_KMS("force priority to high\n");
7910 /* watermark for low clocks */
7911 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7912 rdev->pm.dpm_enabled) {
7914 radeon_dpm_get_mclk(rdev, true) * 10;
7916 radeon_dpm_get_sclk(rdev, true) * 10;
7918 wm_low.yclk = rdev->pm.current_mclk * 10;
7919 wm_low.sclk = rdev->pm.current_sclk * 10;
7922 wm_low.disp_clk = mode->clock;
7923 wm_low.src_width = mode->crtc_hdisplay;
7924 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7925 wm_low.blank_time = line_time - wm_low.active_time;
7926 wm_low.interlaced = false;
7927 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7928 wm_low.interlaced = true;
7929 wm_low.vsc = radeon_crtc->vsc;
7931 if (radeon_crtc->rmx_type != RMX_OFF)
7933 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7934 wm_low.lb_size = lb_size;
7935 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7936 wm_low.num_heads = num_heads;
7938 /* set for low clocks */
7939 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7941 /* possibly force display priority to high */
7942 /* should really do this at mode validation time... */
7943 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7944 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7945 !dce8_check_latency_hiding(&wm_low) ||
7946 (rdev->disp_priority == 2)) {
7947 DRM_DEBUG_KMS("force priority to high\n");
7952 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7954 tmp &= ~LATENCY_WATERMARK_MASK(3);
7955 tmp |= LATENCY_WATERMARK_MASK(1);
7956 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7957 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7958 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7959 LATENCY_HIGH_WATERMARK(line_time)));
7961 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7962 tmp &= ~LATENCY_WATERMARK_MASK(3);
7963 tmp |= LATENCY_WATERMARK_MASK(2);
7964 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7965 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7966 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7967 LATENCY_HIGH_WATERMARK(line_time)));
7968 /* restore original selection */
7969 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7971 /* save values for DPM */
7972 radeon_crtc->line_time = line_time;
7973 radeon_crtc->wm_high = latency_watermark_a;
7974 radeon_crtc->wm_low = latency_watermark_b;
7978 * dce8_bandwidth_update - program display watermarks
7980 * @rdev: radeon_device pointer
7982 * Calculate and program the display watermarks and line
7983 * buffer allocation (CIK).
7985 void dce8_bandwidth_update(struct radeon_device *rdev)
7987 struct drm_display_mode *mode = NULL;
7988 u32 num_heads = 0, lb_size;
7991 radeon_update_display_priority(rdev);
7993 for (i = 0; i < rdev->num_crtc; i++) {
7994 if (rdev->mode_info.crtcs[i]->base.enabled)
7997 for (i = 0; i < rdev->num_crtc; i++) {
7998 mode = &rdev->mode_info.crtcs[i]->base.mode;
7999 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8000 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8005 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8007 * @rdev: radeon_device pointer
8009 * Fetches a GPU clock counter snapshot (SI).
8010 * Returns the 64 bit clock counter snapshot.
8012 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8016 mutex_lock(&rdev->gpu_clock_mutex);
8017 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8018 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8019 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8020 mutex_unlock(&rdev->gpu_clock_mutex);
8024 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8025 u32 cntl_reg, u32 status_reg)
8028 struct atom_clock_dividers dividers;
8031 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8032 clock, false, ÷rs);
8036 tmp = RREG32_SMC(cntl_reg);
8037 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8038 tmp |= dividers.post_divider;
8039 WREG32_SMC(cntl_reg, tmp);
8041 for (i = 0; i < 100; i++) {
8042 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8052 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8056 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8060 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8064 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8066 struct pci_dev *root = rdev->pdev->bus->self;
8067 int bridge_pos, gpu_pos;
8068 u32 speed_cntl, mask, current_data_rate;
8072 if (radeon_pcie_gen2 == 0)
8075 if (rdev->flags & RADEON_IS_IGP)
8078 if (!(rdev->flags & RADEON_IS_PCIE))
8081 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8085 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8088 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8089 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8090 LC_CURRENT_DATA_RATE_SHIFT;
8091 if (mask & DRM_PCIE_SPEED_80) {
8092 if (current_data_rate == 2) {
8093 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8096 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8097 } else if (mask & DRM_PCIE_SPEED_50) {
8098 if (current_data_rate == 1) {
8099 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8102 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8105 bridge_pos = pci_pcie_cap(root);
8109 gpu_pos = pci_pcie_cap(rdev->pdev);
8113 if (mask & DRM_PCIE_SPEED_80) {
8114 /* re-try equalization if gen3 is not already enabled */
8115 if (current_data_rate != 2) {
8116 u16 bridge_cfg, gpu_cfg;
8117 u16 bridge_cfg2, gpu_cfg2;
8118 u32 max_lw, current_lw, tmp;
8120 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8121 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8123 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8124 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8126 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8127 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8129 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8130 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8131 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8133 if (current_lw < max_lw) {
8134 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8135 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8136 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8137 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8138 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8139 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8143 for (i = 0; i < 10; i++) {
8145 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8146 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8149 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8150 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8152 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8153 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8155 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8156 tmp |= LC_SET_QUIESCE;
8157 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8159 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8161 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8166 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8167 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8168 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8169 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8171 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8172 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8173 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8174 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8177 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8178 tmp16 &= ~((1 << 4) | (7 << 9));
8179 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8180 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8182 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8183 tmp16 &= ~((1 << 4) | (7 << 9));
8184 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8185 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8187 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8188 tmp &= ~LC_SET_QUIESCE;
8189 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8194 /* set the link speed */
8195 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8196 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8197 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8199 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8201 if (mask & DRM_PCIE_SPEED_80)
8202 tmp16 |= 3; /* gen3 */
8203 else if (mask & DRM_PCIE_SPEED_50)
8204 tmp16 |= 2; /* gen2 */
8206 tmp16 |= 1; /* gen1 */
8207 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8209 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8210 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8211 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8213 for (i = 0; i < rdev->usec_timeout; i++) {
8214 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8215 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8221 static void cik_program_aspm(struct radeon_device *rdev)
8224 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8225 bool disable_clkreq = false;
8227 if (radeon_aspm == 0)
8230 /* XXX double check IGPs */
8231 if (rdev->flags & RADEON_IS_IGP)
8234 if (!(rdev->flags & RADEON_IS_PCIE))
8237 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8238 data &= ~LC_XMIT_N_FTS_MASK;
8239 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8241 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8243 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8244 data |= LC_GO_TO_RECOVERY;
8246 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8248 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8249 data |= P_IGNORE_EDB_ERR;
8251 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8253 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8254 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8255 data |= LC_PMI_TO_L1_DIS;
8257 data |= LC_L0S_INACTIVITY(7);
8260 data |= LC_L1_INACTIVITY(7);
8261 data &= ~LC_PMI_TO_L1_DIS;
8263 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8265 if (!disable_plloff_in_l1) {
8266 bool clk_req_support;
8268 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8269 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8270 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8272 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8274 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8275 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8276 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8278 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8280 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8281 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8282 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8284 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8286 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8287 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8288 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8290 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8292 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8293 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8294 data |= LC_DYN_LANES_PWR_STATE(3);
8296 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8298 if (!disable_clkreq) {
8299 struct pci_dev *root = rdev->pdev->bus->self;
8302 clk_req_support = false;
8303 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8304 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8305 clk_req_support = true;
8307 clk_req_support = false;
8310 if (clk_req_support) {
8311 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8312 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8314 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8316 orig = data = RREG32_SMC(THM_CLK_CNTL);
8317 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8318 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8320 WREG32_SMC(THM_CLK_CNTL, data);
8322 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8323 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8324 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8326 WREG32_SMC(MISC_CLK_CTRL, data);
8328 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8329 data &= ~BCLK_AS_XCLK;
8331 WREG32_SMC(CG_CLKPIN_CNTL, data);
8333 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8334 data &= ~FORCE_BIF_REFCLK_EN;
8336 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8338 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8339 data &= ~MPLL_CLKOUT_SEL_MASK;
8340 data |= MPLL_CLKOUT_SEL(4);
8342 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8347 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8350 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8351 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8353 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8356 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8357 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8358 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8359 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8360 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8361 data &= ~LC_L0S_INACTIVITY_MASK;
8363 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);