From 5444e2e68bf8f75357167bb44e3087810fb7ccb2 Mon Sep 17 00:00:00 2001 From: chenzhen Date: Mon, 20 Jun 2016 10:03:27 +0800 Subject: [PATCH] MALI: rockchip: upgrade midgard DDK to r12p0-04rel0 Signed-off-by: chenzhen Conflicts: drivers/gpu/arm/midgard/mali_kbase_core_linux.c Change-Id: I17d67d066f6e029c2d8f1711002c347002a48399 Signed-off-by: chenzhen --- drivers/gpu/arm/midgard/Kbuild | 9 +- drivers/gpu/arm/midgard/Kconfig | 9 + drivers/gpu/arm/midgard/Makefile | 6 +- .../backend/gpu/mali_kbase_device_hw.c | 6 +- .../arm/midgard/backend/gpu/mali_kbase_gpu.c | 6 +- .../backend/gpu/mali_kbase_irq_linux.c | 8 +- .../midgard/backend/gpu/mali_kbase_jm_hw.c | 6 - .../backend/gpu/mali_kbase_js_affinity.c | 2 +- .../backend/gpu/mali_kbase_pm_policy.c | 2 +- .../arm/midgard/mali_base_hwconfig_features.h | 2 + .../arm/midgard/mali_base_hwconfig_issues.h | 58 ++ drivers/gpu/arm/midgard/mali_base_kernel.h | 12 +- drivers/gpu/arm/midgard/mali_kbase.h | 3 +- drivers/gpu/arm/midgard/mali_kbase_context.c | 13 +- .../gpu/arm/midgard/mali_kbase_core_linux.c | 47 +- .../arm/midgard/mali_kbase_debug_job_fault.c | 66 ++- .../arm/midgard/mali_kbase_debug_mem_view.c | 31 +- drivers/gpu/arm/midgard/mali_kbase_defs.h | 71 ++- .../gpu/arm/midgard/mali_kbase_dma_fence.c | 524 ++++++++++++++++++ .../gpu/arm/midgard/mali_kbase_dma_fence.h | 150 +++++ .../gpu/arm/midgard/mali_kbase_gator_api.c | 230 ++++---- .../midgard/mali_kbase_gator_hwcnt_names.h | 6 +- .../mali_kbase_gator_hwcnt_names_tmix.h | 291 ++++++++++ .../arm/midgard/mali_kbase_gpuprops_types.h | 2 +- drivers/gpu/arm/midgard/mali_kbase_jd.c | 261 +++++++-- drivers/gpu/arm/midgard/mali_kbase_js.c | 2 + drivers/gpu/arm/midgard/mali_kbase_mem.c | 77 +-- drivers/gpu/arm/midgard/mali_kbase_mem.h | 45 +- .../gpu/arm/midgard/mali_kbase_mem_linux.c | 186 ++++++- drivers/gpu/arm/midgard/mali_kbase_mem_pool.c | 2 + .../arm/midgard/mali_kbase_platform_fake.c | 6 +- drivers/gpu/arm/midgard/mali_kbase_replay.c | 28 +- drivers/gpu/arm/midgard/mali_kbase_softjobs.c | 332 +++++++++-- drivers/gpu/arm/midgard/mali_kbase_strings.c | 23 + drivers/gpu/arm/midgard/mali_kbase_strings.h | 19 + drivers/gpu/arm/midgard/mali_kbase_tlstream.c | 66 +++ drivers/gpu/arm/midgard/mali_kbase_tlstream.h | 24 + .../arm/midgard/mali_kbase_trace_timeline.h | 4 +- drivers/gpu/arm/midgard/mali_kbase_vinstr.c | 30 +- drivers/gpu/arm/midgard/mali_midg_regmap.h | 1 - drivers/gpu/arm/midgard/mali_timeline.h | 3 +- .../vexpress/mali_kbase_config_vexpress.c | 2 +- drivers/gpu/arm/midgard/sconscript | 7 +- 43 files changed, 2276 insertions(+), 402 deletions(-) create mode 100644 drivers/gpu/arm/midgard/mali_kbase_dma_fence.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_dma_fence.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_strings.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_strings.h diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index fbfc618a526d..e25b7d944e98 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r11p0-00rel0" +MALI_RELEASE_NAME ?= "r12p0-04rel0" # Paths required for build KBASE_PATH = $(src) @@ -111,7 +111,8 @@ SRC := \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ mali_kbase_mem_pool_debugfs.c \ - mali_kbase_tlstream.c + mali_kbase_tlstream.c \ + mali_kbase_strings.c ifeq ($(MALI_UNIT_TEST),1) SRC += mali_kbase_tlstream_test.c @@ -121,6 +122,7 @@ ifeq ($(MALI_CUSTOMER_RELEASE),0) SRC += mali_kbase_regs_dump_debugfs.c endif + # Job Scheduler Policy: Completely Fair Scheduler SRC += mali_kbase_js_policy_cfs.c @@ -198,9 +200,10 @@ obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o # Tell the Linux build system to enable building of our .c files mali_kbase-y := $(SRC:.c=.o) +mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o + ifneq ($(wildcard $(src)/internal/Kbuild),) ifeq ($(MALI_CUSTOMER_RELEASE),0) -# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL include $(src)/internal/Kbuild mali_kbase-y += $(INTERNAL:.c=.o) endif diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index 8a33841af5a5..367f0c787531 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -59,6 +59,15 @@ config MALI_DEVFREQ governor, the frequency of Mali will be dynamically selected from the available OPPs. +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali (EXPERIMENTAL)" + depends on MALI_MIDGARD && !KDS + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if KDS is not present and + the Linux Kernel has built in support for DMA_BUF fences. # MALI_EXPERT configuration options diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile index d4d5de4cd512..e1625e6dba79 100644 --- a/drivers/gpu/arm/midgard/Makefile +++ b/drivers/gpu/arm/midgard/Makefile @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -26,10 +26,6 @@ ifeq ($(MALI_UNIT_TEST), 1) EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers endif -ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),) -include $(CURDIR)/internal/Makefile.in -endif - ifeq ($(MALI_BUS_LOG), 1) #Add bus logger symbols EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c index 83d5ec9f7a93..b9238a305177 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,9 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + writel(value, kbdev->reg + offset); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -48,7 +50,9 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + val = readl(kbdev->reg + offset); + dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c index 72a98d0f7952..033caf9fa210 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -81,7 +81,6 @@ int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_timer; -/* Currently disabled on the prototype */ #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { @@ -101,12 +100,13 @@ int kbase_backend_late_init(struct kbase_device *kbdev) return 0; fail_job_slot: -/* Currently disabled on the prototype */ + #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI fail_interrupt_test: #endif /* !CONFIG_MALI_NO_MALI */ #endif /* CONFIG_MALI_DEBUG */ + kbase_backend_timer_term(kbdev); fail_timer: kbase_hwaccess_pm_halt(kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index 49c72f90aac6..b891b12a3299 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,6 @@ #define MMU_IRQ_TAG 1 #define GPU_IRQ_TAG 2 - static void *kbase_tag(void *ptr, u32 tag) { return (void *)(((uintptr_t) ptr) | tag); @@ -39,9 +38,6 @@ static void *kbase_untag(void *ptr) return (void *)(((uintptr_t) ptr) & ~3); } - - - static irqreturn_t kbase_job_irq_handler(int irq, void *data) { unsigned long flags; @@ -151,13 +147,13 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } + static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, [GPU_IRQ_TAG] = kbase_gpu_irq_handler, }; - #ifdef CONFIG_MALI_DEBUG #define JOB_IRQ_HANDLER JOB_IRQ_TAG #define MMU_IRQ_HANDLER MMU_IRQ_TAG diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index ddaae344951f..657717579c1c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -207,8 +207,6 @@ static void kbasep_job_slot_update_head_start_timestamp( } } -#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ - !defined(MALI_MIPE_ENABLED) /** * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint * @kbdev: kbase device @@ -226,7 +224,6 @@ static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) kbase_tlstream_tl_nret_atom_lpu(katom, &kbdev->gpu_props.props.raw_props.js_features[i]); } -#endif void kbase_job_done(struct kbase_device *kbdev, u32 done) { @@ -291,11 +288,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) kbase_tlstream_aux_job_softstop(i); -#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ - !defined(MALI_MIPE_ENABLED) kbasep_trace_tl_nret_atom_lpu( kbdev, i); -#endif /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 6a49669af630..5459281aa968 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 343436fc353d..ad0e98be4571 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 29c78c2ba6d8..a6e08da7035c 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -186,4 +186,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_END }; + + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index e111b0777f1f..55e5e5a4f5d2 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -90,6 +90,7 @@ enum base_hw_issue { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -102,6 +103,7 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, @@ -173,6 +175,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, @@ -211,6 +214,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END @@ -245,6 +249,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -277,6 +282,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -298,6 +304,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -318,6 +325,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -332,6 +340,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -344,6 +353,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -357,6 +367,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -369,6 +380,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -380,6 +392,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -392,6 +405,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -405,6 +419,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -417,6 +432,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -428,6 +444,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -440,6 +457,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -451,6 +469,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -460,6 +479,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -475,6 +495,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -492,6 +513,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -509,6 +531,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -543,6 +566,7 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -590,6 +614,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -599,6 +624,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -609,6 +635,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -617,6 +644,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -627,12 +655,14 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -643,12 +673,14 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -662,6 +694,7 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -673,6 +706,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -681,6 +715,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -691,12 +726,14 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -707,12 +744,14 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -725,6 +764,7 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -736,6 +776,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -743,6 +784,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -753,12 +795,14 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -773,6 +817,7 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -784,6 +829,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -792,6 +838,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -802,6 +849,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -809,6 +857,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -819,12 +868,14 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -838,6 +889,7 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -847,6 +899,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_TMIX_7891, @@ -860,6 +913,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, @@ -884,4 +938,8 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_END }; + + + + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index c59e8b2c04ca..873cd929dc78 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -173,7 +173,10 @@ enum { BASE_MEM_SECURE = (1U << 16), /**< Secure memory */ BASE_MEM_DONT_NEED = (1U << 17), /**< Not needed physical memory */ - + BASE_MEM_IMPORT_SHARED = (1U << 18), /**< Must use shared CPU/GPU zone + (SAME_VA zone) but doesn't + require the addresses to + be the same */ }; /** @@ -181,7 +184,7 @@ enum { * * Must be kept in sync with the ::base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 18 +#define BASE_MEM_FLAGS_NR_BITS 19 /** * A mask for all output bits, excluding IN/OUT bits. @@ -470,6 +473,8 @@ typedef u16 base_jd_core_req; * If this bit is set then completion of this atom will not cause an event to * be sent to userspace, whether successful or not; completion events will be * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. */ #define BASE_JD_REQ_EVENT_COALESCE (1U << 5) @@ -492,6 +497,8 @@ typedef u16 base_jd_core_req; * but should instead be part of a NULL jobs inserted into the dependency tree. * The first pre_dep object must be configured for the external resouces to use, * the second pre_dep object can be used to create other dependencies. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. */ #define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) @@ -817,6 +824,7 @@ struct base_external_resource_list { struct base_jd_debug_copy_buffer { u64 address; u64 size; + struct base_external_resource extres; }; /** diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index 3483acfcfaaf..f09b30a3dff3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -45,6 +45,7 @@ #include #include +#include "mali_kbase_strings.h" #include "mali_kbase_pm.h" #include "mali_kbase_mem_lowlevel.h" #include "mali_kbase_defs.h" @@ -146,6 +147,7 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom); +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); void kbase_job_done(struct kbase_device *kbdev, u32 done); @@ -211,7 +213,6 @@ void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); - void kbasep_as_do_poke(struct work_struct *work); /** Returns the name associated with a Mali exception code diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index d53f728d08e8..0548d2b124ac 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -65,6 +65,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); kctx->slots_pullable = 0; + kctx->tgid = current->tgid; + kctx->pid = current->pid; err = kbase_mem_pool_init(&kctx->mem_pool, kbdev->mem_pool_max_size_default, @@ -97,10 +99,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) #ifdef CONFIG_KDS INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif + err = kbase_dma_fence_init(kctx); + if (err) + goto free_event; err = kbase_mmu_init(kctx); if (err) - goto free_event; + goto term_dma_fence; kctx->pgd = kbase_mmu_alloc_pgd(kctx); if (!kctx->pgd) @@ -110,8 +115,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (!kctx->aliasing_sink_page) goto no_sink_page; - kctx->tgid = current->tgid; - kctx->pid = current->pid; init_waitqueue_head(&kctx->event_queue); kctx->cookies = KBASE_COOKIE_MASK; @@ -160,6 +163,8 @@ no_sink_page: kbase_gpu_vm_unlock(kctx); free_mmu: kbase_mmu_term(kctx); +term_dma_fence: + kbase_dma_fence_term(kctx); free_event: kbase_event_cleanup(kctx); free_jd: @@ -254,6 +259,8 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_pm_context_idle(kbdev); + kbase_dma_fence_term(kctx); + kbase_mmu_term(kctx); pages = atomic_read(&kctx->used_pages); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index e27481a07a0f..e17708c9cde7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -87,9 +87,6 @@ #include -#ifdef CONFIG_MACH_MANTA -#include -#endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) #include @@ -109,12 +106,9 @@ static struct kbase_exported_test_data shared_kernel_test_data; EXPORT_SYMBOL(shared_kernel_test_data); #endif /* MALI_UNIT_TEST */ -#define KBASE_DRV_NAME "mali" /** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */ #define ROCKCHIP_VERSION (13) -static const char kbase_drv_name[] = KBASE_DRV_NAME; - static int kbase_dev_nr; static DEFINE_MUTEX(kbase_dev_list_lock); @@ -1768,7 +1762,6 @@ u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) } #endif /* !CONFIG_MALI_NO_MALI */ - /** Show callback for the @c power_policy sysfs file. * * This function is called to get the contents of the @c power_policy sysfs @@ -2887,7 +2880,7 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TMIx" }, + .name = "Mali-G71" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3312,6 +3305,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) static int registers_map(struct kbase_device * const kbdev) { + /* the first memory resource is the physical address of the GPU * registers */ struct platform_device *pdev = to_platform_device(kbdev->dev); @@ -3479,30 +3473,6 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ -static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p) -{ - struct kbase_device *kbdev = m->private; - - if (!kbdev->secure_mode_support) - seq_puts(m, "unsupported\n"); - else - seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N"); - - return 0; -} - -static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file) -{ - return single_open(file, kbasep_secure_mode_seq_show, in->i_private); -} - -static const struct file_operations kbasep_secure_mode_debugfs_fops = { - .open = kbasep_secure_mode_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3568,10 +3538,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_trace_timeline_debugfs_init(kbdev); #endif /* CONFIG_MALI_TRACE_TIMELINE */ - debugfs_create_file("secure_mode", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_secure_mode_debugfs_fops); - return 0; out: @@ -3650,7 +3616,6 @@ static void kbase_logging_started_cb(void *data) } #endif - static struct attribute *kbase_attrs[] = { #ifdef CONFIG_MALI_DEBUG &dev_attr_debug_command.attr, @@ -3787,7 +3752,6 @@ static int kbase_platform_device_remove(struct platform_device *pdev) kbdev->inited_subsys &= ~inited_backend_early; } - if (kbdev->inited_subsys & inited_power_control) { power_control_term(kbdev); kbdev->inited_subsys &= ~inited_power_control; @@ -3881,7 +3845,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_power_control; - err = kbase_backend_early_init(kbdev); if (err) { dev_err(kbdev->dev, "Early backend initialization failed\n"); @@ -4253,19 +4216,15 @@ static int __init kbase_driver_init(void) if (ret) return ret; -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE ret = kbase_platform_fake_register(); if (ret) return ret; -#endif #endif ret = platform_driver_register(&kbase_platform_driver); -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE if (ret) kbase_platform_fake_unregister(); -#endif #endif return ret; } @@ -4273,11 +4232,9 @@ static int __init kbase_driver_init(void) static void __exit kbase_driver_exit(void) { platform_driver_unregister(&kbase_platform_driver); -#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE kbase_platform_fake_unregister(); #endif -#endif } module_init(kbase_driver_init); diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index f3e426f9539b..bcd2569549c9 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,51 +15,71 @@ +#include #include "mali_kbase_debug_job_fault.h" #ifdef CONFIG_DEBUG_FS -static bool kbase_is_job_fault_event_pending(struct list_head *event_list) +static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) { - bool ret; + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + bool ret; - ret = (!list_empty(event_list)); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + ret = !list_empty(event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return ret; } -static bool kbase_ctx_has_no_event_pending( - struct kbase_context *kctx, struct list_head *event_list) +static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) { + struct kbase_device *kbdev = kctx->kbdev; + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; struct base_job_fault_event *event; + unsigned long flags; - if (list_empty(event_list)) + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); return true; + } list_for_each_entry(event, event_list, head) { - if (event->katom->kctx == kctx) + if (event->katom->kctx == kctx) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, + flags); return false; + } } - return false; + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; } /* wait until the fault happen and copy the event */ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, - struct list_head *event_list, struct base_job_fault_event *event) { + struct list_head *event_list = &kbdev->job_fault_event_list; struct base_job_fault_event *event_in; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); if (wait_event_interruptible(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(event_list))) + kbase_is_job_fault_event_pending(kbdev))) return -ERESTARTSYS; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } event_in = list_entry(event_list->next, struct base_job_fault_event, head); - event->event_code = event_in->event_code; event->katom = event_in->katom; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return 0; } @@ -102,12 +122,16 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) { struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); while (!list_empty(event_list)) { - kbase_job_fault_event_dequeue(kbdev, event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up(&kbdev->job_fault_resume_wq); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); } static void kbase_job_fault_resume_worker(struct work_struct *data) @@ -129,8 +153,7 @@ static void kbase_job_fault_resume_worker(struct work_struct *data) * atoms belong to the same context. */ wait_event(kctx->kbdev->job_fault_resume_wq, - kbase_ctx_has_no_event_pending(kctx, - &kctx->kbdev->job_fault_event_list)); + kbase_ctx_has_no_event_pending(kctx)); atomic_set(&kctx->job_fault_count, 0); kbase_jd_done_worker(&katom->work); @@ -166,9 +189,12 @@ static void kbase_job_fault_event_post(struct kbase_device *kbdev, struct kbase_jd_atom *katom, u32 completion_code) { struct base_job_fault_event *event; + unsigned long flags; + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, katom, completion_code); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up_interruptible(&kbdev->job_fault_wq); @@ -293,9 +319,10 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) */ if (*pos == 0) { event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; event->reg_offset = 0; - if (kbase_job_fault_event_wait(kbdev, - &kbdev->job_fault_event_list, event)) { + if (kbase_job_fault_event_wait(kbdev, event)) { kfree(event); return NULL; } @@ -329,11 +356,15 @@ static void debug_job_fault_stop(struct seq_file *m, void *v) dev_info(kbdev->dev, "debug job fault seq stop stage 1"); } else { + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (!list_empty(&kbdev->job_fault_event_list)) { kbase_job_fault_event_dequeue(kbdev, &kbdev->job_fault_event_list); wake_up(&kbdev->job_fault_resume_wq); } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); dev_info(kbdev->dev, "debug job fault seq stop stage 2"); } @@ -404,6 +435,7 @@ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) init_waitqueue_head(&(kbdev->job_fault_wq)); init_waitqueue_head(&(kbdev->job_fault_resume_wq)); + spin_lock_init(&kbdev->job_fault_event_lock); kbdev->job_fault_resume_workq = alloc_workqueue( "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 42d1d832c0a3..28d93e30794f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,6 +125,8 @@ static int debug_mem_show(struct seq_file *m, void *v) page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); mapping = vmap(&page, 1, VM_MAP, prot); + if (!mapping) + goto out; for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { seq_printf(m, "%016llx:", i + ((map->start_pfn + @@ -160,11 +162,15 @@ static int debug_mem_open(struct inode *i, struct file *file) int ret; ret = seq_open(file, &ops); - if (ret) return ret; mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); + if (!mem_data) { + ret = -ENOMEM; + goto out; + } + mem_data->kctx = kctx; INIT_LIST_HEAD(&mem_data->mapping_list); @@ -184,6 +190,11 @@ static int debug_mem_open(struct inode *i, struct file *file) continue; mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + ret = -ENOMEM; + kbase_gpu_vm_unlock(kctx); + goto out; + } mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); mapping->start_pfn = reg->start_pfn; @@ -197,6 +208,22 @@ static int debug_mem_open(struct inode *i, struct file *file) ((struct seq_file *)file->private_data)->private = mem_data; return 0; + +out: + if (mem_data) { + while (!list_empty(&mem_data->mapping_list)) { + struct debug_mem_mapping *mapping; + + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + fput(kctx_file); + } + seq_release(i, file); + return ret; } static int debug_mem_release(struct inode *inode, struct file *file) diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index c6008dbbc03c..d0eae8e3486b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -53,6 +53,8 @@ #include "sync.h" #endif /* CONFIG_SYNC */ +#include "mali_kbase_dma_fence.h" + #ifdef CONFIG_DEBUG_FS #include #endif /* CONFIG_DEBUG_FS */ @@ -350,6 +352,59 @@ struct kbase_jd_atom { struct sync_fence *fence; struct sync_fence_waiter sync_waiter; #endif /* CONFIG_SYNC */ +#ifdef CONFIG_MALI_DMA_FENCE + struct { + /* This points to the dma-buf fence for this atom. If this is + * NULL then there is no fence for this atom and the other + * fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ + struct fence *fence; + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ enum base_jd_event_code event_code; @@ -799,10 +854,12 @@ struct kbase_device { u64 reg_start; size_t reg_size; void __iomem *reg; + struct { int irq; int flags; } irqs[3]; + struct clk *clock; #ifdef CONFIG_REGULATOR struct regulator *regulator; @@ -817,7 +874,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_NO_MALI */ struct kbase_pm_device_data pm; struct kbasep_js_device_data js_data; @@ -982,6 +1039,7 @@ struct kbase_device { wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; + spinlock_t job_fault_event_lock; struct kbase_context *kctx_fault; #if !MALI_CUSTOMER_RELEASE @@ -1203,6 +1261,12 @@ struct kbase_context { #ifdef CONFIG_KDS struct list_head waiting_kds_resource; #endif +#ifdef CONFIG_MALI_DMA_FENCE + struct { + struct list_head waiting_resource; + struct workqueue_struct *wq; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ /** This is effectively part of the Run Pool, because it only has a valid * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in * @@ -1318,8 +1382,6 @@ struct kbase_context { * which is mapped. * @gpu_addr: The GPU virtual address the resource is * mapped to. - * @refcount: Refcount to keep track of the number of - * active mappings. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. @@ -1327,7 +1389,7 @@ struct kbase_context { * information to it as it could be removed under our feet leaving external * resources pinned. * This metadata structure binds a single external resource to a single - * context, ensuring that per context refcount is tracked separately so it can + * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource * multiple times) don't effect the refcount of the physical allocation. */ @@ -1335,7 +1397,6 @@ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; struct kbase_mem_phy_alloc *alloc; u64 gpu_addr; - u64 refcount; }; enum kbase_reg_access_type { diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c new file mode 100644 index 000000000000..29f3324a9ddd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -0,0 +1,524 @@ +/* + * + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as + * it will be set there. + */ +#include "mali_kbase_dma_fence.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +/* Spin lock protecting all Mali fences as fence->lock. */ +static DEFINE_SPINLOCK(kbase_dma_fence_lock); + + +static void +kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); +} + +void +kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) +{ + list_del(&katom->queue); +} + +static const char * +kbase_dma_fence_get_driver_name(struct fence *fence) +{ + return kbase_drv_name; +} + +static const char * +kbase_dma_fence_get_timeline_name(struct fence *fence) +{ + return kbase_timeline_name; +} + +static bool +kbase_dma_fence_enable_signaling(struct fence *fence) +{ + /* If in the future we need to add code here remember to + * to get a reference to the fence and release it when signaling + * as stated in fence.h + */ + return true; +} + +static void +kbase_dma_fence_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static const struct fence_ops kbase_dma_fence_ops = { + .get_driver_name = kbase_dma_fence_get_driver_name, + .get_timeline_name = kbase_dma_fence_get_timeline_name, + .enable_signaling = kbase_dma_fence_enable_signaling, + /* Use the default wait */ + .wait = fence_default_wait, + .fence_value_str = kbase_dma_fence_fence_value_str, +}; + +static struct fence * +kbase_dma_fence_new(unsigned int context, unsigned int seqno) +{ + struct fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence_init(fence, + &kbase_dma_fence_ops, + &kbase_dma_fence_lock, + context, + seqno); + + return fence; +} + +static int +kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + struct reservation_object *content_res = NULL; + unsigned int content_res_idx = 0; + unsigned int r; + int err; + + ww_acquire_init(ctx, &reservation_ww_class); + +retry: + for (r = 0; r < info->dma_fence_resv_count; r++) { + if (info->resv_objs[r] == content_res) { + content_res = NULL; + continue; + } + + err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); + if (err) + goto error; + } + + ww_acquire_done(ctx); + return err; + +error: + content_res_idx = r; + + /* Unlock the locked one ones */ + for (r--; r >= 0; r--) + ww_mutex_unlock(&info->resv_objs[r]->lock); + + if (content_res) + ww_mutex_unlock(&content_res->lock); + + /* If we deadlock try with lock_slow and retry */ + if (err == -EDEADLK) { + content_res = info->resv_objs[content_res_idx]; + ww_mutex_lock_slow(&content_res->lock, ctx); + goto retry; + } + + /* If we are here the function failed */ + ww_acquire_fini(ctx); + return err; +} + +static void +kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + unsigned int r; + + for (r = 0; r < info->dma_fence_resv_count; r++) + ww_mutex_unlock(&info->resv_objs[r]->lock); + ww_acquire_fini(ctx); +} + +/** + * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom + * @katom: Pointer to katom + * + * This function will free all fence callbacks on the katom's list of + * callbacks. Callbacks that have not yet been called, because their fence + * hasn't yet signaled, will first be removed from the fence. + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + */ +static void +kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom) +{ + struct kbase_dma_fence_cb *cb, *tmp; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Clean up and free callbacks. */ + list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { + bool ret; + + /* Cancel callbacks that hasn't been called yet. */ + ret = fence_remove_callback(cb->fence, &cb->fence_cb); + if (ret) { + /* Fence had not signaled, clean up after + * canceling. + */ + atomic_dec(&katom->dma_fence.dep_count); + } + + fence_put(cb->fence); + list_del(&cb->node); + kfree(cb); + } +} + +/** + * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom + * @katom: Katom to cancel + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + */ +static void +kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Cancel callbacks and clean up. */ + kbase_dma_fence_free_callbacks(katom); + + KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0); + + /* Mark the atom as handled in case all fences signaled just before + * canceling the callbacks and the worker was queued. + */ + atomic_set(&katom->dma_fence.dep_count, -1); + + /* Prevent job_done_nolock from being called twice on an atom when + * there is a race between job completion and cancellation. + */ + + if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { + /* Wait was cancelled - zap the atom */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); + } +} + +/** + * kbase_dma_fence_work() - Worker thread called when a fence is signaled + * @pwork: work_struct containing a pointer to a katom + * + * This function will clean and mark all dependencies as satisfied + */ +static void +kbase_dma_fence_work(struct work_struct *pwork) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_context *ctx; + + katom = container_of(pwork, struct kbase_jd_atom, work); + ctx = &katom->kctx->jctx; + + mutex_lock(&ctx->lock); + if (atomic_read(&katom->dma_fence.dep_count) != 0) + goto out; + + atomic_set(&katom->dma_fence.dep_count, -1); + + /* Remove atom from list of dma-fence waiting atoms. */ + kbase_dma_fence_waiters_remove(katom); + /* Cleanup callbacks. */ + kbase_dma_fence_free_callbacks(katom); + /* Queue atom on GPU. */ + kbase_jd_dep_clear_locked(katom); + +out: + mutex_unlock(&ctx->lock); +} + +static int +kbase_dma_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback) +{ + int err = 0; + struct kbase_dma_fence_cb *kbase_fence_cb; + + kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); + if (!kbase_fence_cb) + return -ENOMEM; + + fence_get(fence); + + kbase_fence_cb->fence = fence; + kbase_fence_cb->katom = katom; + INIT_LIST_HEAD(&kbase_fence_cb->node); + + err = fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback); + if (err == -ENOENT) { + /* Fence signaled, clear the error and return */ + err = 0; + kbase_fence_cb->fence = NULL; + fence_put(fence); + kfree(kbase_fence_cb); + } else if (err) { + /* Do nothing, just return the error */ + fence_put(fence); + kfree(kbase_fence_cb); + } else { + atomic_inc(&katom->dma_fence.dep_count); + /* Add callback to katom's list of callbacks */ + list_add(&katom->dma_fence.callbacks, &kbase_fence_cb->node); + } + + return err; +} + +static void +kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) +{ + struct kbase_dma_fence_cb *kcb = container_of(cb, + struct kbase_dma_fence_cb, + fence_cb); + struct kbase_jd_atom *katom = kcb->katom; + struct kbase_context *kctx = katom->kctx; + + /* If the atom is zapped dep_count will be forced to a negative number + * preventing this callback from ever scheduling work. Which in turn + * would reschedule the atom. + */ + if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { + bool ret; + + INIT_WORK(&katom->work, kbase_dma_fence_work); + ret = queue_work(kctx->dma_fence.wq, &katom->work); + /* Warn if work was already queued, that should not happen. */ + WARN_ON(!ret); + } +} + +static int +kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + struct reservation_object *resv, + bool exclusive) +{ + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; + unsigned int shared_count = 0; + int err, i; + + err = reservation_object_get_fences_rcu(resv, + &excl_fence, + &shared_count, + &shared_fences); + if (err) + return err; + + if (excl_fence) { + err = kbase_dma_fence_add_callback(katom, + excl_fence, + kbase_dma_fence_cb); + if (err) + goto error; + } + + if (exclusive) { + for (i = 0; i < shared_count; i++) { + err = kbase_dma_fence_add_callback(katom, + shared_fences[i], + kbase_dma_fence_cb); + if (err) + goto error; + } + } + kfree(shared_fences); + + return err; + +error: + /* Cancel and clean up all callbacks that was set up before the error. + */ + kbase_dma_fence_free_callbacks(katom); + kfree(shared_fences); + + return err; +} + +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive) +{ + unsigned int i; + + for (i = 0; i < info->dma_fence_resv_count; i++) { + /* Duplicate resource, ignore */ + if (info->resv_objs[i] == resv) + return; + } + + info->resv_objs[info->dma_fence_resv_count] = resv; + if (exclusive) + set_bit(info->dma_fence_resv_count, + info->dma_fence_excl_bitmap); + (info->dma_fence_resv_count)++; +} + +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info) +{ + int err, i; + struct fence *fence; + struct ww_acquire_ctx ww_ctx; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + atomic_set(&katom->dma_fence.dep_count, 1); + fence = kbase_dma_fence_new(katom->dma_fence.context, + atomic_inc_return(&katom->dma_fence.seqno)); + if (!fence) { + err = -ENOMEM; + dev_err(katom->kctx->kbdev->dev, + "Error %d creating fence.\n", err); + return err; + } + + katom->dma_fence.fence = fence; + + err = kbase_dma_fence_lock_reservations(info, &ww_ctx); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d locking reservations.\n", err); + return err; + } + + for (i = 0; i < info->dma_fence_resv_count; i++) { + struct reservation_object *obj = info->resv_objs[i]; + + if (!test_bit(i, info->dma_fence_excl_bitmap)) { + err = reservation_object_reserve_shared(obj); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d reserving space for shared fence.\n", err); + goto end; + } + + err = kbase_dma_fence_add_reservation_callback(katom, obj, false); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_shared_fence(obj, katom->dma_fence.fence); + } else { + err = kbase_dma_fence_add_reservation_callback(katom, obj, true); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_excl_fence(obj, katom->dma_fence.fence); + } + } + +end: + kbase_dma_fence_unlock_reservations(info, &ww_ctx); + + if (!err) { + /* Test if the callbacks are already triggered */ + if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { + atomic_set(&katom->dma_fence.dep_count, -1); + kbase_dma_fence_free_callbacks(katom); + } else { + /* Add katom to the list of dma-buf fence waiting atoms + * only if it is still waiting. + */ + kbase_dma_fence_waiters_add(katom); + } + } + + return err; +} + +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) +{ + struct kbase_jd_atom *katom, *katom_tmp; + + list_for_each_entry_safe(katom, katom_tmp, + &kctx->dma_fence.waiting_resource, queue) { + kbase_dma_fence_waiters_remove(katom); + kbase_dma_fence_cancel_atom(katom); + } +} + +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) +{ + /* Cancel callbacks and clean up. */ + kbase_dma_fence_free_callbacks(katom); +} + +void kbase_dma_fence_signal(struct kbase_jd_atom *katom) +{ + if (!katom->dma_fence.fence) + return; + + KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == -1); + + /* Signal the atom's fence. */ + fence_signal(katom->dma_fence.fence); + fence_put(katom->dma_fence.fence); + katom->dma_fence.fence = NULL; + + kbase_dma_fence_free_callbacks(katom); +} + +void kbase_dma_fence_term(struct kbase_context *kctx) +{ + destroy_workqueue(kctx->dma_fence.wq); + kctx->dma_fence.wq = NULL; +} + +int kbase_dma_fence_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); + + kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", + WQ_UNBOUND, 1, kctx->pid); + if (!kctx->dma_fence.wq) + return -ENOMEM; + + return 0; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h new file mode 100644 index 000000000000..3b0a69b3dd03 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h @@ -0,0 +1,150 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_DMA_FENCE_H_ +#define _KBASE_DMA_FENCE_H_ + +#ifdef CONFIG_MALI_DMA_FENCE + +#include +#include +#include + + +/* Forward declaration from mali_kbase_defs.h */ +struct kbase_jd_atom; +struct kbase_context; + +/** + * struct kbase_dma_fence_cb - Mali dma-fence callback data struct + * @fence_cb: Callback function + * @katom: Pointer to katom that is waiting on this callback + * @fence: Pointer to the fence object on which this callback is waiting + * @node: List head for linking this callback to the katom + */ +struct kbase_dma_fence_cb { + struct fence_cb fence_cb; + struct kbase_jd_atom *katom; + struct fence *fence; + struct list_head node; +}; + +/** + * struct kbase_dma_fence_resv_info - Structure with list of reservation objects + * @resv_objs: Array of reservation objects to attach the + * new fence to. + * @dma_fence_resv_count: Number of reservation objects in the array. + * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. + * + * This is used by some functions to pass around a collection of data about + * reservation objects. + */ +struct kbase_dma_fence_resv_info { + struct reservation_object **resv_objs; + unsigned int dma_fence_resv_count; + unsigned long *dma_fence_excl_bitmap; +}; + +/** + * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs + * @resv: Reservation object to add to the array. + * @info: Pointer to struct with current reservation info + * @exclusive: Boolean indicating if exclusive access is needed + * + * The function adds a new reservation_object to an existing array of + * reservation_objects. At the same time keeps track of which objects require + * exclusive access in dma_fence_excl_bitmap. + */ +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive); + +/** + * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs + * @katom: Katom with the external dependency. + * @info: Pointer to struct with current reservation info + * + * Return: An error code or 0 if succeeds + */ +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info); + +/** + * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx + * @kctx: Pointer to kbase context + * + * This function will cancel and clean up all katoms on @kctx that is waiting + * on dma-buf fences. + * + * Locking: jctx.lock needs to be held when calling this function. + */ +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); + +/** + * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom + * @katom: Pointer to katom whose callbacks are to be canceled + * + * This function cancels all dma-buf fence callbacks on @katom, but does not + * cancel the katom itself. + * + * The caller is responsible for ensuring that jd_done_nolock is called on + * @katom. + * + * Locking: jctx.lock must be held when calling this function. + */ +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait + * @katom: Pointer to katom to signal and clean up + * + * This function will signal the @katom's fence, if it has one, and clean up + * the callback data from the katom's wait on earlier fences. + * + * Locking: jctx.lock must be held while calling this function. + */ +void kbase_dma_fence_signal(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_term() - Terminate Mali dma-fence context + * @kctx: kbase context to terminate + */ +void kbase_dma_fence_term(struct kbase_context *kctx); + +/** + * kbase_dma_fence_init() - Initialize Mali dma-fence context + * @kctx: kbase context to initialize + */ +int kbase_dma_fence_init(struct kbase_context *kctx); + +/** + * kbase_dma_fence_waiters_remove()- Remove katom from dma-fence wait list + * @katom: Pointer to katom to remove from list + */ +void kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom); + +#else /* CONFIG_MALI_DMA_FENCE */ +/* Dummy functions for when dma-buf fence isn't enabled. */ + +static inline int kbase_dma_fence_init(struct kbase_context *kctx) +{ + return 0; +} + +static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} +#endif /* CONFIG_MALI_DMA_FENCE */ +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 061564104b67..b9077ea78e5d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,18 +28,23 @@ #define MALI_BYTES_PER_COUNTER 4 struct kbase_gator_hwcnt_handles { - struct kbase_device *kbdev; - struct kbase_context *kctx; - u64 hwcnt_gpu_va; - void *hwcnt_cpu_va; - struct kbase_vmap_struct hwcnt_map; + struct kbase_device *kbdev; + struct kbase_vinstr_client *vinstr_cli; + void *vinstr_buffer; + struct work_struct dump_work; + int dump_complete; + spinlock_t dump_lock; }; +static void dump_worker(struct work_struct *work); + const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) { - uint32_t gpu_id; const char * const *hardware_counters; struct kbase_device *kbdev; + uint32_t gpu_id; + uint32_t product_id; + uint32_t count; if (!total_counters) return NULL; @@ -50,58 +55,78 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) return NULL; gpu_id = kbdev->gpu_props.props.core_props.product_id; - - switch (gpu_id) { - /* If we are using a Mali-T60x device */ - case GPU_ID_PI_T60X: - hardware_counters = hardware_counters_mali_t60x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t60x); - break; - /* If we are using a Mali-T62x device */ - case GPU_ID_PI_T62X: - hardware_counters = hardware_counters_mali_t62x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t62x); - break; - /* If we are using a Mali-T72x device */ - case GPU_ID_PI_T72X: - hardware_counters = hardware_counters_mali_t72x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t72x); - break; - /* If we are using a Mali-T76x device */ - case GPU_ID_PI_T76X: - hardware_counters = hardware_counters_mali_t76x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t76x); - break; - /* If we are using a Mali-T82x device */ - case GPU_ID_PI_T82X: - hardware_counters = hardware_counters_mali_t82x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t82x); - break; - /* If we are using a Mali-T83x device */ - case GPU_ID_PI_T83X: - hardware_counters = hardware_counters_mali_t83x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t83x); - break; - /* If we are using a Mali-T86x device */ - case GPU_ID_PI_T86X: - hardware_counters = hardware_counters_mali_t86x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t86x); - break; - /* If we are using a Mali-T88x device */ - case GPU_ID_PI_TFRX: - hardware_counters = hardware_counters_mali_t88x; - *total_counters = ARRAY_SIZE(hardware_counters_mali_t88x); - break; - default: - hardware_counters = NULL; - *total_counters = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id); - break; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + hardware_counters = hardware_counters_mali_tMIx; + count = ARRAY_SIZE(hardware_counters_mali_tMIx); + break; + default: + hardware_counters = NULL; + count = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); + break; + } + } else { + switch (gpu_id) { + /* If we are using a Mali-T60x device */ + case GPU_ID_PI_T60X: + hardware_counters = hardware_counters_mali_t60x; + count = ARRAY_SIZE(hardware_counters_mali_t60x); + break; + /* If we are using a Mali-T62x device */ + case GPU_ID_PI_T62X: + hardware_counters = hardware_counters_mali_t62x; + count = ARRAY_SIZE(hardware_counters_mali_t62x); + break; + /* If we are using a Mali-T72x device */ + case GPU_ID_PI_T72X: + hardware_counters = hardware_counters_mali_t72x; + count = ARRAY_SIZE(hardware_counters_mali_t72x); + break; + /* If we are using a Mali-T76x device */ + case GPU_ID_PI_T76X: + hardware_counters = hardware_counters_mali_t76x; + count = ARRAY_SIZE(hardware_counters_mali_t76x); + break; + /* If we are using a Mali-T82x device */ + case GPU_ID_PI_T82X: + hardware_counters = hardware_counters_mali_t82x; + count = ARRAY_SIZE(hardware_counters_mali_t82x); + break; + /* If we are using a Mali-T83x device */ + case GPU_ID_PI_T83X: + hardware_counters = hardware_counters_mali_t83x; + count = ARRAY_SIZE(hardware_counters_mali_t83x); + break; + /* If we are using a Mali-T86x device */ + case GPU_ID_PI_T86X: + hardware_counters = hardware_counters_mali_t86x; + count = ARRAY_SIZE(hardware_counters_mali_t86x); + break; + /* If we are using a Mali-T88x device */ + case GPU_ID_PI_TFRX: + hardware_counters = hardware_counters_mali_t88x; + count = ARRAY_SIZE(hardware_counters_mali_t88x); + break; + default: + hardware_counters = NULL; + count = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", + gpu_id); + break; + } } /* Release the kbdev reference. */ kbase_release_device(kbdev); + *total_counters = count; + /* If we return a string array take a reference on the module (or fail). */ if (hardware_counters && !try_module_get(THIS_MODULE)) return NULL; @@ -120,13 +145,8 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; - struct kbase_uk_hwcnt_setup setup; - int err; + struct kbase_uk_hwcnt_reader_setup setup; uint32_t dump_size = 0, i = 0; - struct kbase_va_region *reg; - u64 flags; - u64 nr_pages; - u16 va_alignment = 0; if (!in_out_info) return NULL; @@ -135,15 +155,19 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn if (!hand) return NULL; + INIT_WORK(&hand->dump_work, dump_worker); + spin_lock_init(&hand->dump_lock); + /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; - /* Create a kbase_context */ - hand->kctx = kbase_create_context(hand->kbdev, true); - if (!hand->kctx) + dump_size = kbase_vinstr_dump_size(hand->kbdev); + hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); + if (!hand->vinstr_buffer) goto release_device; + in_out_info->kernel_dump_buffer = hand->vinstr_buffer; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; @@ -160,7 +184,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) - goto destroy_context; + goto free_vinstr_buffer; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * @@ -203,7 +227,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) - goto destroy_context; + goto free_vinstr_buffer; dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; @@ -223,58 +247,32 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } in_out_info->nr_hwc_blocks = i; - in_out_info->size = dump_size; - flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; - nr_pages = PFN_UP(dump_size); - reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, - &flags, &hand->hwcnt_gpu_va, &va_alignment); - if (!reg) - goto free_layout; - - hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, - dump_size, &hand->hwcnt_map); - - if (!hand->hwcnt_cpu_va) - goto free_buffer; - - in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; - memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); - - /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ - setup.dump_buffer = hand->hwcnt_gpu_va; setup.jm_bm = in_out_info->bitmask[0]; setup.tiler_bm = in_out_info->bitmask[1]; setup.shader_bm = in_out_info->bitmask[2]; setup.mmu_l2_bm = in_out_info->bitmask[3]; - - err = kbase_instr_hwcnt_enable(hand->kctx, &setup); - if (err) - goto free_unmap; - - kbase_instr_hwcnt_clear(hand->kctx); + hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, + &setup, hand->vinstr_buffer); + if (!hand->vinstr_cli) { + dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); + goto free_layout; + } return hand; -free_unmap: - kbase_vunmap(hand->kctx, &hand->hwcnt_map); - -free_buffer: - kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); - free_layout: kfree(in_out_info->hwc_layout); -destroy_context: - kbase_destroy_context(hand->kctx); +free_vinstr_buffer: + kfree(hand->vinstr_buffer); release_device: kbase_release_device(hand->kbdev); free_hand: kfree(hand); - return NULL; } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); @@ -285,27 +283,39 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k kfree(in_out_info->hwc_layout); if (opaque_handles) { - kbase_instr_hwcnt_disable(opaque_handles->kctx); - kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map); - kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va); - kbase_destroy_context(opaque_handles->kctx); + cancel_work_sync(&opaque_handles->dump_work); + kbase_vinstr_detach_client(opaque_handles->vinstr_cli); + kfree(opaque_handles->vinstr_buffer); kbase_release_device(opaque_handles->kbdev); kfree(opaque_handles); } } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); +static void dump_worker(struct work_struct *work) +{ + struct kbase_gator_hwcnt_handles *hand; + + hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); + if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, + BASE_HWCNT_READER_EVENT_MANUAL)) { + spin_lock_bh(&hand->dump_lock); + hand->dump_complete = 1; + spin_unlock_bh(&hand->dump_lock); + } else { + schedule_work(&hand->dump_work); + } +} + uint32_t kbase_gator_instr_hwcnt_dump_complete( struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success) { - bool ret_res, success_res; if (opaque_handles && success) { - ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx, - &success_res); - *success = (uint32_t)success_res; - return (uint32_t)(ret_res != 0); + *success = opaque_handles->dump_complete; + opaque_handles->dump_complete = 0; + return *success; } return 0; } @@ -314,9 +324,7 @@ KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) { if (opaque_handles) - return (kbase_instr_hwcnt_request_dump( - opaque_handles->kctx) == 0); - + schedule_work(&opaque_handles->dump_work); return 0; } KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h index eb76f01b0fda..c247dd698e19 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2156,4 +2156,8 @@ static const char * const hardware_counters_mali_t88x[] = { "T88x_L2_REPLAY_FULL" }; +#include "mali_kbase_gator_hwcnt_names_tmix.h" + + + #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h new file mode 100644 index 000000000000..682f98efcd13 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h @@ -0,0 +1,291 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ + +static const char * const hardware_counters_mali_tMIx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "TMIx_MESSAGES_SENT", + "TMIx_MESSAGES_RECEIVED", + "TMIx_GPU_ACTIVE", + "TMIx_IRQ_ACTIVE", + "TMIx_JS0_JOBS", + "TMIx_JS0_TASKS", + "TMIx_JS0_ACTIVE", + "", + "TMIx_JS0_WAIT_READ", + "TMIx_JS0_WAIT_ISSUE", + "TMIx_JS0_WAIT_DEPEND", + "TMIx_JS0_WAIT_FINISH", + "TMIx_JS1_JOBS", + "TMIx_JS1_TASKS", + "TMIx_JS1_ACTIVE", + "", + "TMIx_JS1_WAIT_READ", + "TMIx_JS1_WAIT_ISSUE", + "TMIx_JS1_WAIT_DEPEND", + "TMIx_JS1_WAIT_FINISH", + "TMIx_JS2_JOBS", + "TMIx_JS2_TASKS", + "TMIx_JS2_ACTIVE", + "", + "TMIx_JS2_WAIT_READ", + "TMIx_JS2_WAIT_ISSUE", + "TMIx_JS2_WAIT_DEPEND", + "TMIx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "TMIx_TILER_ACTIVE", + "TMIx_JOBS_PROCESSED", + "TMIx_TRIANGLES", + "TMIx_LINES", + "TMIx_POINTS", + "TMIx_FRONT_FACING", + "TMIx_BACK_FACING", + "TMIx_PRIM_VISIBLE", + "TMIx_PRIM_CULLED", + "TMIx_PRIM_CLIPPED", + "TMIx_PRIM_SAT_CULLED", + "", + "", + "TMIx_BUS_READ", + "", + "TMIx_BUS_WRITE", + "TMIx_LOADING_DESC", + "TMIx_IDVS_POS_SHAD_REQ", + "TMIx_IDVS_POS_SHAD_WAIT", + "TMIx_IDVS_POS_SHAD_STALL", + "TMIx_IDVS_POS_FIFO_FULL", + "TMIx_PREFETCH_STALL", + "TMIx_VCACHE_HIT", + "TMIx_VCACHE_MISS", + "TMIx_VCACHE_LINE_WAIT", + "TMIx_VFETCH_POS_READ_WAIT", + "TMIx_VFETCH_VERTEX_WAIT", + "TMIx_VFETCH_STALL", + "TMIx_PRIMASSY_STALL", + "TMIx_BBOX_GEN_STALL", + "TMIx_IDVS_VBU_HIT", + "TMIx_IDVS_VBU_MISS", + "TMIx_IDVS_VBU_LINE_DEALLOCATE", + "TMIx_IDVS_VAR_SHAD_REQ", + "TMIx_IDVS_VAR_SHAD_STALL", + "TMIx_BINNER_STALL", + "TMIx_ITER_STALL", + "TMIx_COMPRESS_MISS", + "TMIx_COMPRESS_STALL", + "TMIx_PCACHE_HIT", + "TMIx_PCACHE_MISS", + "TMIx_PCACHE_MISS_STALL", + "TMIx_PCACHE_EVICT_STALL", + "TMIx_PMGR_PTR_WR_STALL", + "TMIx_PMGR_PTR_RD_STALL", + "TMIx_PMGR_CMD_WR_STALL", + "TMIx_WRBUF_ACTIVE", + "TMIx_WRBUF_HIT", + "TMIx_WRBUF_MISS", + "TMIx_WRBUF_NO_FREE_LINE_STALL", + "TMIx_WRBUF_NO_AXI_ID_STALL", + "TMIx_WRBUF_AXI_STALL", + "", + "", + "", + "TMIx_UTLB_TRANS", + "TMIx_UTLB_TRANS_HIT", + "TMIx_UTLB_TRANS_STALL", + "TMIx_UTLB_TRANS_MISS_DELAY", + "TMIx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "TMIx_FRAG_ACTIVE", + "TMIx_FRAG_PRIMITIVES", + "TMIx_FRAG_PRIM_RAST", + "TMIx_FRAG_FPK_ACTIVE", + "TMIx_FRAG_STARVING", + "TMIx_FRAG_WARPS", + "TMIx_FRAG_PARTIAL_WARPS", + "TMIx_FRAG_QUADS_RAST", + "TMIx_FRAG_QUADS_EZS_TEST", + "TMIx_FRAG_QUADS_EZS_UPDATE", + "TMIx_FRAG_QUADS_EZS_KILL", + "TMIx_FRAG_LZS_TEST", + "TMIx_FRAG_LZS_KILL", + "", + "TMIx_FRAG_PTILES", + "TMIx_FRAG_TRANS_ELIM", + "TMIx_QUAD_FPK_KILLER", + "", + "TMIx_COMPUTE_ACTIVE", + "TMIx_COMPUTE_TASKS", + "TMIx_COMPUTE_WARPS", + "TMIx_COMPUTE_STARVING", + "TMIx_EXEC_CORE_ACTIVE", + "TMIx_EXEC_ACTIVE", + "TMIx_EXEC_INSTR_COUNT", + "TMIx_EXEC_INSTR_DIVERGED", + "TMIx_EXEC_INSTR_STARVING", + "TMIx_ARITH_INSTR_SINGLE_FMA", + "TMIx_ARITH_INSTR_DOUBLE", + "TMIx_ARITH_INSTR_MSG", + "TMIx_ARITH_INSTR_MSG_ONLY", + "TMIx_TEX_INSTR", + "TMIx_TEX_INSTR_MIPMAP", + "TMIx_TEX_INSTR_COMPRESSED", + "TMIx_TEX_INSTR_3D", + "TMIx_TEX_INSTR_TRILINEAR", + "TMIx_TEX_COORD_ISSUE", + "TMIx_TEX_COORD_STALL", + "TMIx_TEX_STARVE_CACHE", + "TMIx_TEX_STARVE_FILTER", + "TMIx_LS_MEM_READ_FULL", + "TMIx_LS_MEM_READ_SHORT", + "TMIx_LS_MEM_WRITE_FULL", + "TMIx_LS_MEM_WRITE_SHORT", + "TMIx_LS_MEM_ATOMIC", + "TMIx_VARY_INSTR", + "", + "", + "TMIx_ATTR_INSTR", + "TMIx_ARITH_INSTR_FP_MUL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "TMIx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "TMIx_L2_RD_MSG_IN", + "TMIx_L2_RD_MSG_IN_STALL", + "TMIx_L2_WR_MSG_IN", + "TMIx_L2_WR_MSG_IN_STALL", + "TMIx_L2_SNP_MSG_IN", + "TMIx_L2_SNP_MSG_IN_STALL", + "TMIx_L2_RD_MSG_OUT", + "TMIx_L2_RD_MSG_OUT_STALL", + "TMIx_L2_WR_MSG_OUT", + "TMIx_L2_ANY_LOOKUP", + "TMIx_L2_READ_LOOKUP", + "TMIx_L2_WRITE_LOOKUP", + "TMIx_L2_EXT_SNOOP_LOOKUP", + "TMIx_L2_EXT_READ", + "TMIx_L2_EXT_READ_NOSNP", + "TMIx_L2_EXT_READ_UNIQUE", + "TMIx_L2_EXT_READ_BEATS", + "TMIx_L2_EXT_AR_STALL", + "TMIx_L2_EXT_AR_CNT_Q1", + "TMIx_L2_EXT_AR_CNT_Q2", + "TMIx_L2_EXT_AR_CNT_Q3", + "", + "", + "", + "", + "", + "TMIx_L2_EXT_WRITE", + "TMIx_L2_EXT_WRITE_NOSNP_FULL", + "TMIx_L2_EXT_WRITE_NOSNP_PTL", + "TMIx_L2_EXT_WRITE_SNP_FULL", + "TMIx_L2_EXT_WRITE_SNP_PTL", + "TMIx_L2_EXT_WRITE_BEATS", + "TMIx_L2_EXT_W_STALL", + "TMIx_L2_EXT_AW_CNT_Q1", + "TMIx_L2_EXT_AW_CNT_Q2", + "TMIx_L2_EXT_AW_CNT_Q3", + "TMIx_L2_EXT_SNOOP", + "TMIx_L2_EXT_SNOOP_STALL", + "TMIx_L2_EXT_SNOOP_RESP_CLEAN", + "TMIx_L2_EXT_SNOOP_RESP_DATA", + "TMIx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index 781375a9a97f..f42e91b6daa1 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index c091ffef643f..0ffd8beb29fe 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -34,6 +34,8 @@ #include #include +#include "mali_kbase_dma_fence.h" + #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) @@ -105,6 +107,39 @@ static int jd_run_atom(struct kbase_jd_atom *katom) return kbasep_js_add_job(kctx, katom); } +#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved. + */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom, NULL); + } + + if (resched) + kbase_js_sched_all(kbdev); + } +} +#endif + #ifdef CONFIG_KDS /* Add the katom to the kds waiting list. @@ -137,44 +172,20 @@ static void kds_dep_clear(void *callback_parameter, void *callback_extra_paramet { struct kbase_jd_atom *katom; struct kbase_jd_context *ctx; - struct kbase_device *kbdev; katom = (struct kbase_jd_atom *)callback_parameter; KBASE_DEBUG_ASSERT(katom); + ctx = &katom->kctx->jctx; - kbdev = katom->kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev); + /* If KDS resource has already been satisfied (e.g. due to zapping) + * do nothing. + */ mutex_lock(&ctx->lock); - - /* KDS resource has already been satisfied (e.g. due to zapping) */ - if (katom->kds_dep_satisfied) - goto out; - - /* This atom's KDS dependency has now been met */ - katom->kds_dep_satisfied = true; - - /* Check whether the atom's other dependencies were already met. If - * katom is a GPU atom then the job scheduler may be able to represent - * the dependencies, hence we may attempt to submit it before they are - * met. Other atoms must have had both dependencies resolved */ - if (IS_GPU_ATOM(katom) || - (!kbase_jd_katom_dep_atom(&katom->dep[0]) && - !kbase_jd_katom_dep_atom(&katom->dep[1]))) { - /* katom dep complete, attempt to run it */ - bool resched = false; - - resched = jd_run_atom(katom); - - if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - /* The atom has already finished */ - resched |= jd_done_nolock(katom, NULL); - } - - if (resched) - kbase_js_sched_all(kbdev); + if (!katom->kds_dep_satisfied) { + katom->kds_dep_satisfied = true; + kbase_jd_dep_clear_locked(katom); } - out: mutex_unlock(&ctx->lock); } @@ -213,6 +224,16 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) kds_resource_set_release_sync(&katom->kds_rset); } #endif /* CONFIG_KDS */ + +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + * Any successfully completed atom would have had all it's callbacks + * completed before the atom was run, so only flush for failed atoms. + */ + if (katom->event_code != BASE_JD_EVENT_DONE) + flush_workqueue(katom->kctx->dma_fence.wq); +#endif /* CONFIG_MALI_DMA_FENCE */ } static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) @@ -226,6 +247,10 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) katom->kds_dep_satisfied = true; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_signal(katom); +#endif /* CONFIG_MALI_DMA_FENCE */ + kbase_gpu_vm_lock(katom->kctx); /* only roll back if extres is non-NULL */ if (katom->extres) { @@ -262,6 +287,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st struct kds_resource **kds_resources = NULL; unsigned long *kds_access_bitmap = NULL; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + struct kbase_dma_fence_resv_info info = { + .dma_fence_resv_count = 0, + }; +#endif struct base_external_resource *input_extres; KBASE_DEBUG_ASSERT(katom); @@ -297,20 +327,39 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st KBASE_DEBUG_ASSERT(0 != katom->nr_extres); kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); - if (NULL == kds_resources) { + if (!kds_resources) { err_ret_val = -ENOMEM; goto early_err_out; } KBASE_DEBUG_ASSERT(0 != katom->nr_extres); - kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); - - if (NULL == kds_access_bitmap) { + kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), + GFP_KERNEL); + if (!kds_access_bitmap) { err_ret_val = -ENOMEM; goto early_err_out; } #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + info.resv_objs = kmalloc_array(katom->nr_extres, + sizeof(struct reservation_object *), + GFP_KERNEL); + if (!info.resv_objs) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), + GFP_KERNEL); + if (!info.dma_fence_excl_bitmap) { + err_ret_val = -ENOMEM; + goto early_err_out; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + /* Take the processes mmap lock */ down_read(¤t->mm->mmap_sem); @@ -351,6 +400,17 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st goto failed_loop; } +#ifdef CONFIG_MALI_DMA_FENCE + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + struct reservation_object *resv; + + resv = reg->gpu_alloc->imported.umm.dma_buf->resv; + if (resv) + kbase_dma_fence_add_reservation(resv, &info, + exclusive); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + /* finish with updating out array with the data we found */ /* NOTE: It is important that this is the last thing we do (or * at least not before the first write) as we overwrite elements @@ -391,19 +451,50 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + if (info.dma_fence_resv_count) { + int ret; + + ret = kbase_dma_fence_wait(katom, &info); + if (ret < 0) + goto failed_dma_fence_setup; + } + + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); +#endif /* CONFIG_MALI_DMA_FENCE */ + /* all done OK */ return 0; /* error handling section */ +#ifdef CONFIG_MALI_DMA_FENCE +failed_dma_fence_setup: #ifdef CONFIG_KDS - failed_kds_setup: + /* If we are here, dma_fence setup failed but KDS didn't. + * Revert KDS setup if any. + */ + if (kds_res_count) { + mutex_unlock(&katom->kctx->jctx.lock); + kds_resource_set_release_sync(&katom->kds_rset); + mutex_lock(&katom->kctx->jctx.lock); + + kbase_jd_kds_waiters_remove(katom); + katom->kds_dep_satisfied = true; + } +#endif /* CONFIG_KDS */ +#endif /* CONFIG_MALI_DMA_FENCE */ +#ifdef CONFIG_KDS +failed_kds_setup: +#endif +#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) /* Lock the processes mmap lock */ down_read(¤t->mm->mmap_sem); /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); -#endif /* CONFIG_KDS */ +#endif failed_loop: /* undo the loop work */ @@ -424,6 +515,10 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_resources); kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); +#endif return err_ret_val; } @@ -456,6 +551,10 @@ static inline void jd_resolve_dep(struct list_head *out_list, } #endif +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_callbacks(dep_atom); +#endif + dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -470,9 +569,40 @@ static inline void jd_resolve_dep(struct list_head *out_list, } } if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = atomic_read(&dep_atom->dma_fence.dep_count); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else if (dep_count == 0) { + /* + * All fences for this atom has signaled, but + * the worker that will queue the atom has not + * yet run. + * + * Mark the atom as handled by setting + * dep_count to -1 so that the worker doesn't + * queue the atom again. + */ + atomic_set(&dep_atom->dma_fence.dep_count, -1); + /* + * Remove the atom from the list of dma-fence + * waiting atoms. + */ + kbase_dma_fence_waiters_remove(dep_atom); + dep_satisfied = true; + } else { + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + #ifdef CONFIG_KDS - if (dep_atom->kds_dep_satisfied) + dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; #endif + + if (dep_satisfied) list_add_tail(&dep_atom->dep_item[0], out_list); } } @@ -756,6 +886,9 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->kds_dep_satisfied = true; katom->kds_rset = NULL; #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + atomic_set(&katom->dma_fence.dep_count, -1); +#endif /* Don't do anything if there is a mess up with dependencies. This is done in a separate cycle to check both the dependencies at ones, otherwise @@ -868,11 +1001,23 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_tlstream_tl_ret_atom_ctx(katom, kctx); for (i = 0; i < 2; i++) if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( - &katom->dep[i])) + &katom->dep[i])) { kbase_tlstream_tl_dep_atom_atom( (void *)kbase_jd_katom_dep_atom( &katom->dep[i]), (void *)katom); + } else if (BASE_JD_DEP_TYPE_INVALID != + user_atom->pre_dep[i].dependency_type) { + /* Resolved dependency. */ + int dep_atom_number = + user_atom->pre_dep[i].atom_id; + struct kbase_jd_atom *dep_atom = + &jctx->atoms[dep_atom_number]; + + kbase_tlstream_tl_rdep_atom_atom( + (void *)dep_atom, + (void *)katom); + } /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { @@ -893,6 +1038,17 @@ bool jd_submit_atom(struct kbase_context *kctx, goto out; } + /* Reject atoms with invalid core requirements */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { + dev_warn(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; + ret = jd_done_nolock(katom, NULL); + goto out; + } + /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) @@ -949,6 +1105,13 @@ bool jd_submit_atom(struct kbase_context *kctx, } #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_DMA_FENCE + if (atomic_read(&katom->dma_fence.dep_count) != -1) { + ret = false; + goto out; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) @@ -1522,8 +1685,19 @@ void kbase_jd_zap_context(struct kbase_context *kctx) } #endif +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_all_atoms(kctx); +#endif + mutex_unlock(&kctx->jctx.lock); +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + */ + flush_workqueue(kctx->dma_fence.wq); +#endif + kbase_jm_wait_for_zero_jobs(kctx); } @@ -1539,7 +1713,8 @@ int kbase_jd_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); - kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1); + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", + WQ_HIGHPRI | WQ_UNBOUND, 1); if (NULL == kctx->jctx.job_done_wq) { mali_err = -ENOMEM; goto out1; @@ -1554,6 +1729,12 @@ int kbase_jd_init(struct kbase_context *kctx) /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + +#ifdef CONFIG_MALI_DMA_FENCE + kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1); + atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); +#endif } mutex_init(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index 83228c05883b..c1a8e9ceb2fa 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -2665,6 +2665,7 @@ static void js_return_worker(struct work_struct *data) WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; } if (kctx->as_nr != KBASEP_AS_NR_INVALID && @@ -2763,6 +2764,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; } } WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 1484a6a688cc..0c69bf4d5c62 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -777,7 +777,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * list should it be there. */ kbase_sticky_resource_release(reg->kctx, NULL, - reg->start_pfn << PAGE_SHIFT, true); + reg->start_pfn << PAGE_SHIFT); kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); @@ -1309,8 +1309,8 @@ int kbase_alloc_phy_pages_helper( size_t nr_pages_requested) { int new_page_count __maybe_unused; + size_t old_page_count = alloc->nents; - KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); @@ -1326,9 +1326,18 @@ int kbase_alloc_phy_pages_helper( kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, - nr_pages_requested, alloc->pages + alloc->nents) != 0) + nr_pages_requested, alloc->pages + old_page_count) != 0) goto no_alloc; + /* + * Request a zone cache update, this scans only the new pages an + * appends their information to the zone cache. if the update + * fails then clear the cache so we fall-back to doing things + * page by page. + */ + if (kbase_zone_cache_update(alloc, old_page_count) != 0) + kbase_zone_cache_clear(alloc); + kbase_tlstream_aux_pagesalloc( (u32)alloc->imported.kctx->id, (u64)new_page_count); @@ -1367,6 +1376,14 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + /* + * Clear the zone cache, we don't expect JIT allocations to be + * shrunk in parts so there is no point trying to optimize for that + * by scanning for the changes caused by freeing this memory and + * updating the existing cache entries. + */ + kbase_zone_cache_clear(alloc); + kbase_mem_pool_free_pages(&kctx->mem_pool, nr_pages_to_free, start_free, @@ -1441,6 +1458,8 @@ void kbase_mem_kref_free(struct kref *kref) break; #endif case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + if (alloc->imported.user_buf.mm) + mmdrop(alloc->imported.user_buf.mm); kfree(alloc->imported.user_buf.pages); break; case KBASE_MEM_TYPE_TB:{ @@ -1529,6 +1548,10 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; + /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ + if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) + return false; + return true; } @@ -1897,7 +1920,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, } else { /* No suitable JIT allocation was found so create a new one */ u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | - BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF; + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_COHERENT_LOCAL; u64 gpu_addr; u16 alignment; @@ -2031,7 +2055,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, long i; int err = -ENOMEM; unsigned long address; - struct task_struct *owner; + struct mm_struct *mm; struct device *dev; unsigned long offset; unsigned long local_size; @@ -2039,13 +2063,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc = reg->gpu_alloc; pa = kbase_get_gpu_phy_pages(reg); address = alloc->imported.user_buf.address; - owner = alloc->imported.user_buf.owner; + mm = alloc->imported.user_buf.mm; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); pages = alloc->imported.user_buf.pages; - pinned_pages = get_user_pages(owner, owner->mm, + pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR, @@ -2256,7 +2280,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( /* decide what needs to happen for this resource */ switch (reg->gpu_alloc->type) { case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { - if (reg->gpu_alloc->imported.user_buf.owner->mm != locked_mm) + if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) goto exit; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; @@ -2372,7 +2396,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( lockdep_assert_held(&kctx->reg_lock); /* - * Walk the per context externel resource metadata list for the + * Walk the per context external resource metadata list for the * metadata which matches the region which is being acquired. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { @@ -2412,14 +2436,8 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( goto fail_map; meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; - meta->refcount = 1; list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); - } else { - if (meta->refcount == UINT_MAX) - goto failed; - - meta->refcount++; } return meta; @@ -2431,16 +2449,17 @@ failed: } bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force) + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) { struct kbase_ctx_ext_res_meta *walker; + struct kbase_va_region *reg; lockdep_assert_held(&kctx->reg_lock); /* Search of the metadata if one isn't provided. */ if (!meta) { /* - * Walk the per context externel resource metadata list for the + * Walk the per context external resource metadata list for the * metadata which matches the region which is being released. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, @@ -2456,22 +2475,14 @@ bool kbase_sticky_resource_release(struct kbase_context *kctx, if (!meta) return false; - meta->refcount--; - if ((meta->refcount == 0) || force) { - /* - * Last reference to the metadata, drop the physical memory - * reference and free the metadata. - */ - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); + /* Drop the physical memory reference and free the metadata. */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); - kbase_unmap_external_resource(kctx, reg, meta->alloc); - list_del(&meta->ext_res_node); - kfree(meta); - } + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); return true; } @@ -2502,6 +2513,6 @@ void kbase_sticky_resource_term(struct kbase_context *kctx) walker = list_first_entry(&kctx->ext_res_meta_head, struct kbase_ctx_ext_res_meta, ext_res_node); - kbase_sticky_resource_release(kctx, walker, 0, true); + kbase_sticky_resource_release(kctx, walker, 0); } } diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 8f7629a3ce5c..dea3aba0475e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -131,6 +131,8 @@ struct kbase_mem_phy_alloc { unsigned long properties; + struct list_head zone_cache; + /* member in union valid based on @a type */ union { #ifdef CONFIG_UMP @@ -157,7 +159,7 @@ struct kbase_mem_phy_alloc { unsigned long nr_pages; struct page **pages; unsigned int current_mapping_usage_count; - struct task_struct *owner; + struct mm_struct *mm; dma_addr_t *dma_addrs; } user_buf; } imported; @@ -373,6 +375,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en alloc->pages = (void *)(alloc + 1); INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; + INIT_LIST_HEAD(&alloc->zone_cache); if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) alloc->imported.user_buf.dma_addrs = @@ -989,23 +992,53 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * @kctx: kbase context. * @meta: Binding metadata. * @gpu_addr: GPU address of the external resource. - * @force: If the release is being forced. * * If meta is NULL then gpu_addr will be used to scan the metadata list and * find the matching metadata (if any), otherwise the provided meta will be * used and gpu_addr will be ignored. * - * If force is true then the refcount in the metadata is ignored and the - * resource will be forced freed. - * * Return: True if the release found the metadata and the reference was dropped. */ bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force); + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); /** * kbase_sticky_resource_term - Terminate sticky resource management. * @kctx: kbase context */ void kbase_sticky_resource_term(struct kbase_context *kctx); + +/** + * kbase_zone_cache_update - Update the memory zone cache after new pages have + * been added. + * @alloc: The physical memory allocation to build the cache for. + * @start_offset: Offset to where the new pages start. + * + * Updates an existing memory zone cache, updating the counters for the + * various zones. + * If the memory allocation doesn't already have a zone cache assume that + * one isn't created and thus don't do anything. + * + * Return: Zero cache was updated, negative error code on error. + */ +int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, + size_t start_offset); + +/** + * kbase_zone_cache_build - Build the memory zone cache. + * @alloc: The physical memory allocation to build the cache for. + * + * Create a new zone cache for the provided physical memory allocation if + * one doesn't already exist, if one does exist then just return. + * + * Return: Zero if the zone cache was created, negative error code on error. + */ +int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_zone_cache_clear - Clear the memory zone cache. + * @alloc: The physical memory allocation to clear the cache on. + */ +void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 0abe0e6ab39c..0fb570ccc721 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -42,10 +42,7 @@ #include #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); static const struct vm_operations_struct kbase_vm_ops; @@ -519,6 +516,100 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx) unregister_shrinker(&kctx->reclaim); } +struct kbase_mem_zone_cache_entry { + /* List head used to link the cache entry to the memory allocation. */ + struct list_head zone_node; + /* The zone the cacheline is for. */ + struct zone *zone; + /* The number of pages in the allocation which belong to this zone. */ + u64 count; +}; + +static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc, + size_t start_offset) +{ + struct kbase_mem_zone_cache_entry *cache = NULL; + size_t i; + int ret = 0; + + for (i = start_offset; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); + bool create = true; + + if (cache && (cache->zone == zone)) { + /* + * Fast path check as most of the time adjacent + * pages come from the same zone. + */ + create = false; + } else { + /* + * Slow path check, walk all the cache entries to see + * if we already know about this zone. + */ + list_for_each_entry(cache, &alloc->zone_cache, zone_node) { + if (cache->zone == zone) { + create = false; + break; + } + } + } + + /* This zone wasn't found in the cache, create an entry for it */ + if (create) { + cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) { + ret = -ENOMEM; + goto bail; + } + cache->zone = zone; + cache->count = 0; + list_add(&cache->zone_node, &alloc->zone_cache); + } + + cache->count++; + } + return 0; + +bail: + return ret; +} + +int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, + size_t start_offset) +{ + /* + * Bail if the zone cache is empty, only update the cache if it + * existed in the first place. + */ + if (list_empty(&alloc->zone_cache)) + return 0; + + return kbase_zone_cache_builder(alloc, start_offset); +} + +int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc) +{ + /* Bail if the zone cache already exists */ + if (!list_empty(&alloc->zone_cache)) + return 0; + + return kbase_zone_cache_builder(alloc, 0); +} + +void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_mem_zone_cache_entry *walker; + + while(!list_empty(&alloc->zone_cache)){ + walker = list_first_entry(&alloc->zone_cache, + struct kbase_mem_zone_cache_entry, zone_node); + list_del(&walker->zone_node); + kfree(walker); + } +} + /** * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. * @alloc: The physical allocation @@ -526,13 +617,28 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx) static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_mem_zone_cache_entry *zone_cache; int __maybe_unused new_page_count; - int i; + int err; - for (i = 0; i < alloc->nents; i++) { - struct page *p = phys_to_page(alloc->pages[i]); + /* Attempt to build a zone cache of tracking */ + err = kbase_zone_cache_build(alloc); + if (err == 0) { + /* Bulk update all the zones */ + list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { + zone_page_state_add(zone_cache->count, + zone_cache->zone, NR_SLAB_RECLAIMABLE); + } + } else { + /* Fall-back to page by page updates */ + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); - zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE); + } } kbase_process_page_usage_dec(kctx, alloc->nents); @@ -540,11 +646,9 @@ static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) &kctx->used_pages); kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif } /** @@ -555,8 +659,9 @@ static void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) { struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_mem_zone_cache_entry *zone_cache; int __maybe_unused new_page_count; - int i; + int err; new_page_count = kbase_atomic_add_pages(alloc->nents, &kctx->used_pages); @@ -567,17 +672,29 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) * then remove it from the reclaimable accounting. */ kbase_process_page_usage_inc(kctx, alloc->nents); - for (i = 0; i < alloc->nents; i++) { - struct page *p = phys_to_page(alloc->pages[i]); + /* Attempt to build a zone cache of tracking */ + err = kbase_zone_cache_build(alloc); + if (err == 0) { + /* Bulk update all the zones */ + list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { + zone_page_state_add(-zone_cache->count, + zone_cache->zone, NR_SLAB_RECLAIMABLE); + } + } else { + /* Fall-back to page by page updates */ + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + struct zone *zone = page_zone(p); - zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE); + zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE); + } } -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif } int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) @@ -894,6 +1011,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in struct kbase_va_region *reg; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; + bool shared_zone = false; dma_buf = dma_buf_get(fd); if (IS_ERR_OR_NULL(dma_buf)) @@ -914,15 +1032,23 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* ignore SAME_VA */ *flags &= ~BASE_MEM_SAME_VA; + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* 64-bit tasks must MMAP anyway, but not expose this address to clients */ + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); } else { -#else - if (1) { -#endif reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); } @@ -989,6 +1115,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_va_region *reg; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; + bool shared_zone = false; *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - PFN_DOWN(address); @@ -1002,14 +1129,24 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* SAME_VA generally not supported with imported memory (no known use cases) */ *flags &= ~BASE_MEM_SAME_VA; + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* 64-bit tasks must MMAP anyway, but not expose this address to - * clients */ + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; zone = KBASE_REG_ZONE_SAME_VA; } -#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); if (!reg) @@ -1059,7 +1196,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages; reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages, sizeof(struct page *), GFP_KERNEL); - reg->gpu_alloc->imported.user_buf.owner = current; + reg->gpu_alloc->imported.user_buf.mm = current->mm; + atomic_inc(¤t->mm->mm_count); if (!reg->gpu_alloc->imported.user_buf.pages) goto no_page_array; @@ -1576,7 +1714,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } /* can't grow regions which are ephemeral */ - if (reg->flags & BASE_MEM_DONT_NEED) { + if (reg->flags & KBASE_REG_DONT_NEED) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; goto out_unlock; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index c0f47be362f3..957061893b00 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -264,6 +264,8 @@ static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { p = kbase_mem_pool_alloc_page(pool); + if (!p) + break; kbase_mem_pool_add(pool, p); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c index 5bbd6d485638..1a44957fe44a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,9 +23,6 @@ #include #include -#ifdef CONFIG_MACH_MANTA -#include -#endif /* * This file is included only for type definitions and functions belonging to @@ -62,6 +59,7 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io linux_resources[0].start = io_resources->io_memory_region.start; linux_resources[0].end = io_resources->io_memory_region.end; linux_resources[0].flags = IORESOURCE_MEM; + linux_resources[1].start = io_resources->job_irq_number; linux_resources[1].end = io_resources->job_irq_number; linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index 6ac49df3f2be..170b9390a773 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -799,13 +799,33 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; /* Sanity check core requirements*/ - if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + if (unlikely((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & - ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS || + ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC) != BASE_JD_REQ_FS || t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || - f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - dev_err(kctx->kbdev->dev, "Invalid core requirements\n"); + f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)) { + + int t_atom_type = t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; + int f_atom_type = f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; + int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; + int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; + + if (t_atom_type != BASE_JD_REQ_T) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", + t_atom_type, BASE_JD_REQ_T); + } + if (f_atom_type != BASE_JD_REQ_FS) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", + f_atom_type, BASE_JD_REQ_FS); + } + if (t_has_ex_res) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); + } + if (f_has_ex_res) { + dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); + } + goto out; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 57904ff223d7..0fe489031cde 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -19,6 +19,10 @@ #include +#if defined(CONFIG_DMA_SHARED_BUFFER) +#include +#include +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ #include #ifdef CONFIG_SYNC #include "sync.h" @@ -528,12 +532,35 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) } struct kbase_debug_copy_buffer { - u64 size; + size_t size; struct page **pages; int nr_pages; - u64 offset; + size_t offset; + /*To find memory region*/ + u64 gpu_addr; + + struct page **extres_pages; + int nr_extres_pages; }; +static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) +{ + struct page **pages = buffer->extres_pages; + int nr_pages = buffer->nr_extres_pages; + + if (pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *pg = pages[i]; + + if (pg) + put_page(pg); + } + kfree(pages); + } +} + static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = @@ -544,8 +571,13 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) if (!buffers) return; + kbase_gpu_vm_lock(katom->kctx); for (i = 0; i < nr; i++) { int p; + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, buffers[i].gpu_addr); if (!buffers[i].pages) break; @@ -556,7 +588,21 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) put_page(pg); } kfree(buffers[i].pages); + if (reg && reg->gpu_alloc) { + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + { + free_user_buffer(&buffers[i]); + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_mem_phy_alloc_put(reg->gpu_alloc); + } } + kbase_gpu_vm_unlock(katom->kctx); kfree(buffers); katom->jc = 0; @@ -574,7 +620,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) if (!user_structs) return -EINVAL; - buffers = kmalloc_array(nr, sizeof(*buffers), GFP_KERNEL); + buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); if (!buffers) { ret = -ENOMEM; katom->jc = 0; @@ -589,13 +635,11 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } - if (copy_from_user(user_buffers, user_structs, - sizeof(*user_buffers)*nr)) { - ret = -EINVAL; + ret = copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr); + if (ret) goto out_cleanup; - } - down_read(¤t->mm->mmap_sem); for (i = 0; i < nr; i++) { u64 addr = user_buffers[i].address; u64 page_addr = addr & PAGE_MASK; @@ -603,47 +647,109 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) u64 last_page_addr = end_page_addr & PAGE_MASK; int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; int pinned_pages; + struct kbase_va_region *reg; + struct base_external_resource user_extres; - if (!user_buffers[i].address) { - memset(&buffers[i], 0, - sizeof(struct kbase_debug_copy_buffer)); + if (!addr) continue; - } buffers[i].nr_pages = nr_pages; buffers[i].offset = addr & ~PAGE_MASK; + if (buffers[i].offset >= PAGE_SIZE) { + ret = -EINVAL; + goto out_cleanup; + } buffers[i].size = user_buffers[i].size; buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!buffers[i].pages) { ret = -ENOMEM; - goto out_unlock; + goto out_cleanup; } - pinned_pages = get_user_pages(current, current->mm, page_addr, + pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, /* Write */ - 0, /* No force */ - buffers[i].pages, - NULL); + buffers[i].pages); if (pinned_pages < 0) { ret = pinned_pages; - goto out_unlock; + goto out_cleanup; } if (pinned_pages != nr_pages) { ret = -EINVAL; + goto out_cleanup; + } + + user_extres = user_buffers[i].extres; + if (user_extres.ext_resource == 0ULL) { + ret = -EINVAL; + goto out_cleanup; + } + + buffers[i].gpu_addr = user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + kbase_gpu_vm_lock(katom->kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, buffers[i].gpu_addr); + + if (NULL == reg || NULL == reg->cpu_alloc || + (reg->flags & KBASE_REG_FREE)) { + ret = -EINVAL; goto out_unlock; } - } - up_read(¤t->mm->mmap_sem); + kbase_mem_phy_alloc_get(reg->gpu_alloc); + buffers[i].nr_extres_pages = reg->nr_pages; + if (reg->nr_pages*PAGE_SIZE != buffers[i].size) + dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + { + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + unsigned long nr_pages = + alloc->imported.user_buf.nr_pages; + + if (alloc->imported.user_buf.mm != current->mm) { + ret = -EINVAL; + goto out_unlock; + } + buffers[i].extres_pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + if (!buffers[i].extres_pages) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = get_user_pages_fast( + alloc->imported.user_buf.address, + nr_pages, 0, + buffers[i].extres_pages); + if (ret != nr_pages) + goto out_unlock; + ret = 0; + break; + } + case BASE_MEM_IMPORT_TYPE_UMP: + { + dev_warn(katom->kctx->kbdev->dev, + "UMP is not supported for debug_copy jobs\n"); + ret = -EINVAL; + goto out_unlock; + } + default: + /* Nothing to be done. */ + break; + } + kbase_gpu_vm_unlock(katom->kctx); + } kfree(user_buffers); return ret; out_unlock: - up_read(¤t->mm->mmap_sem); + kbase_gpu_vm_unlock(katom->kctx); out_cleanup: kfree(buffers); @@ -652,43 +758,146 @@ out_cleanup: /* Frees allocated memory for kbase_debug_copy_job struct, including * members, and sets jc to 0 */ kbase_debug_copy_finish(katom); - return ret; } -static int kbase_debug_copy(struct kbase_jd_atom *katom) +static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, + void *extres_page, struct page **pages, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset, size_t *to_copy) +{ + void *target_page = kmap(pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; + + if (!target_page) { + *target_page_nr += 1; + dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); + return; + } + + chunk = min(chunk, *to_copy); + + memcpy(target_page + offset, extres_page, chunk); + *to_copy -= chunk; + + kunmap(pages[*target_page_nr]); + + *target_page_nr += 1; + if (*target_page_nr >= nr_pages) + return; + + target_page = kmap(pages[*target_page_nr]); + if (!target_page) { + *target_page_nr += 1; + dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); + return; + } + + KBASE_DEBUG_ASSERT(target_page); + + chunk = min(offset, *to_copy); + memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); + *to_copy -= chunk; + + kunmap(pages[*target_page_nr]); +} + +static int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) { - struct kbase_debug_copy_buffer *buffers = - (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; unsigned int i; + unsigned int target_page_nr = 0; + struct kbase_va_region *reg; + struct page **pages = buf_data->pages; + u64 offset = buf_data->offset; + size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; + size_t to_copy = min(extres_size, buf_data->size); + int ret = 0; - for (i = 0; i < katom->nr_extres; i++) { - u64 offset = buffers[i].offset; - u64 buffer_space = buffers[i].size; - int p; + KBASE_DEBUG_ASSERT(pages != NULL); - for (p = 0; p < buffers[i].nr_pages; p++) { - struct page *pg = buffers[i].pages[p]; - void *kpage = kmap(pg); - u64 page_space = PAGE_SIZE-offset; - u64 space; + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, buf_data->gpu_addr); - if (page_space <= buffer_space) - space = page_space; - else - space = buffer_space; + if (!reg) { + ret = -EINVAL; + goto out_unlock; + } - /* Temporary - GPUCORE-1843 covers the implementation - * of the actual copying. */ - memset(kpage+offset, 0x4B, space); + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + { + for (i = 0; i < buf_data->nr_extres_pages; i++) { + struct page *pg = buf_data->extres_pages[i]; + void *extres_page = kmap(pg); - if (!PageReserved(pg)) - SetPageDirty(pg); + if (extres_page) + kbase_mem_copy_from_extres_page(kctx, + extres_page, pages, + buf_data->nr_pages, + &target_page_nr, + offset, &to_copy); kunmap(pg); - offset = 0; - buffer_space -= space; + if (target_page_nr >= buf_data->nr_pages) + break; } + break; + } + break; +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: { + struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + + KBASE_DEBUG_ASSERT(dma_buf != NULL); + + ret = dma_buf_begin_cpu_access(dma_buf, 0, + buf_data->nr_extres_pages*PAGE_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + + for (i = 0; i < buf_data->nr_extres_pages; i++) { + + void *extres_page = dma_buf_kmap(dma_buf, i); + + if (extres_page) + kbase_mem_copy_from_extres_page(kctx, + extres_page, pages, + buf_data->nr_pages, + &target_page_nr, + offset, &to_copy); + + dma_buf_kunmap(dma_buf, i, extres_page); + if (target_page_nr >= buf_data->nr_pages) + break; + } + dma_buf_end_cpu_access(dma_buf, 0, + buf_data->nr_extres_pages*PAGE_SIZE, + DMA_FROM_DEVICE); + break; + } +#endif + default: + ret = -EINVAL; + } +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; + +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + + for (i = 0; i < katom->nr_extres; i++) { + int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); + + if (res) + return res; } return 0; @@ -698,7 +907,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) { __user void *data = (__user void *)(uintptr_t) katom->jc; struct base_jit_alloc_info *info; - struct kbase_context *kctx = katom->kctx; int ret; /* Fail the job if there is no info structure */ @@ -719,15 +927,12 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) goto free_info; } - /* If the ID is zero or is in use then fail the job */ - if ((info->id == 0) || (kctx->jit_alloc[info->id])) { + /* If the ID is zero then fail the job */ + if (info->id == 0) { ret = -EINVAL; goto free_info; } - /* Set the jit_alloc to a non-zero value so we know the ID is in use */ - kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; - /* Sanity check that the PA fits within the VA */ if (info->va_pages < info->commit_pages) { ret = -EINVAL; @@ -773,6 +978,18 @@ static void kbase_jit_allocate_process(struct kbase_jd_atom *katom) info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return; + } + + /* + * Mark the allocation so we know it's in use even if the + * allocation itself fails. + */ + kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; + /* Create a JIT allocation */ reg = kbase_jit_allocate(kctx, info); if (!reg) { @@ -924,7 +1141,7 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) goto failed_loop; } else if (!kbase_sticky_resource_release(katom->kctx, NULL, - gpu_addr, false)) + gpu_addr)) failed = true; } @@ -949,8 +1166,7 @@ failed_loop: gpu_addr = ext_res->ext_res[i].ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr, - false); + kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); } katom->event_code = BASE_JD_EVENT_JOB_INVALID; @@ -996,7 +1212,13 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET); break; case BASE_JD_REQ_SOFT_DEBUG_COPY: - return kbase_debug_copy(katom); + { + int res = kbase_debug_copy(katom); + + if (res) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + break; + } case BASE_JD_REQ_SOFT_JIT_ALLOC: kbase_jit_allocate_process(katom); break; diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.c b/drivers/gpu/arm/midgard/mali_kbase_strings.c new file mode 100644 index 000000000000..c98762cec244 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_strings.c @@ -0,0 +1,23 @@ + /* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +#include "mali_kbase_strings.h" + +#define KBASE_DRV_NAME "mali" +#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + +const char kbase_drv_name[] = KBASE_DRV_NAME; +const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; diff --git a/drivers/gpu/arm/midgard/mali_kbase_strings.h b/drivers/gpu/arm/midgard/mali_kbase_strings.h new file mode 100644 index 000000000000..41b8fdbec6a4 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_strings.h @@ -0,0 +1,19 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +extern const char kbase_drv_name[]; +extern const char kbase_timeline_name[]; diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index e41efb81b217..962b19cf380c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -140,6 +140,8 @@ enum tl_msg_id_obj { KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, KBASE_TL_DEP_ATOM_ATOM, + KBASE_TL_NDEP_ATOM_ATOM, + KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_AS_CONFIG, @@ -407,6 +409,20 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom1,atom2" }, + { + KBASE_TL_NDEP_ATOM_ATOM, + __stringify(KBASE_TL_NDEP_ATOM_ATOM), + "atom2 no longer depends on atom1", + "@pp", + "atom1,atom2" + }, + { + KBASE_TL_RDEP_ATOM_ATOM, + __stringify(KBASE_TL_RDEP_ATOM_ATOM), + "resolved dependecy of atom2 depending on atom1", + "@pp", + "atom1,atom2" + }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -1790,6 +1806,56 @@ void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } +void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 6c5c59616b78..22a0d96f9a72 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -124,6 +124,8 @@ void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config); void __kbase_tlstream_tl_attrib_as_config( @@ -393,6 +395,28 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \ __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) +/** + * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depended on child atom + * + * Function emits a timeline message informing that parent atom execution + * dependency on child atom has been resolved. + */ +#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) + +/** + * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depended on child atom + * + * Function emits a timeline message informing that parent atom execution + * dependency on child atom has been resolved. + */ +#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) + /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes * @atom: name of the atom object diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h index d92caf054804..22a36494e72e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -213,7 +213,6 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); js, _producerof_atom_number_completed); \ } while (0) - /** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a * certin caller */ #define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ @@ -319,7 +318,6 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); #define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() - static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index 371122f188b2..45de4e020dd6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -441,9 +441,10 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( struct kbase_vinstr_client *cli; KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(buffer_count >= 0); - KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); - KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); + + if (buffer_count > MAX_BUFFER_COUNT + || (buffer_count & (buffer_count - 1))) + return NULL; cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) @@ -497,7 +498,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( /* Allocate required number of dumping buffers. */ cli->dump_buffers = (char *)__get_free_pages( - GFP_KERNEL, + GFP_KERNEL | __GFP_ZERO, get_order(cli->dump_size * cli->buffer_count)); if (!cli->dump_buffers) goto error; @@ -1517,7 +1518,8 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, struct vm_area_struct *vma) { struct kbase_vinstr_client *cli; - size_t size; + unsigned long size, addr, pfn, offset; + unsigned long vm_size = vma->vm_end - vma->vm_start; KBASE_DEBUG_ASSERT(filp); KBASE_DEBUG_ASSERT(vma); @@ -1526,14 +1528,24 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, KBASE_DEBUG_ASSERT(cli); size = cli->buffer_count * cli->dump_size; - if (vma->vm_end - vma->vm_start > size) - return -ENOMEM; + + if (vma->vm_pgoff > (size >> PAGE_SHIFT)) + return -EINVAL; + if (vm_size > size) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + if ((vm_size + offset) > size) + return -EINVAL; + + addr = __pa((unsigned long)cli->dump_buffers + offset); + pfn = addr >> PAGE_SHIFT; return remap_pfn_range( vma, vma->vm_start, - __pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT, - size, + pfn, + vm_size, vma->vm_page_prot); } diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index 6b1d67d63720..7498407e6372 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -567,5 +567,4 @@ /* End TILER_CONFIG register */ - #endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h index c3563723cb63..bd5f6614b6bb 100644 --- a/drivers/gpu/arm/midgard/mali_timeline.h +++ b/drivers/gpu/arm/midgard/mali_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -386,7 +386,6 @@ TRACE_EVENT(mali_timeline_context_active, __entry->count) ); - #endif /* _MALI_TIMELINE_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c index 687b1a8c0431..15ce2bc5eea5 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript index 643b1f3f39e9..c36e92dd0d7a 100644 --- a/drivers/gpu/arm/midgard/sconscript +++ b/drivers/gpu/arm/midgard/sconscript @@ -44,8 +44,7 @@ kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/K*')) ] -kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c'), - Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*/*.c')] +kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')] if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] @@ -123,10 +122,6 @@ if env['os'] != 'android': if int(env['ump']) == 1: env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko') -if Glob('internal/sconsfrag'): - execfile('internal/sconsfrag') - get_internal(env) - env.KernelObjTarget('kbase', cmd) env.AppendUnique(BASE=['cutils_linked_list']) -- 2.34.1