From: chenzhen Date: Wed, 31 Aug 2016 07:15:20 +0000 (+0800) Subject: MALI: rockchip: upgrade midgard DDK to r13p0-00rel0 X-Git-Tag: firefly_0821_release~1547 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=4fe5f7e659d7db140ae0bb5df16b444acaffe833;p=firefly-linux-kernel-4.4.55.git MALI: rockchip: upgrade midgard DDK to r13p0-00rel0 Conflicts: drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h drivers/gpu/arm/midgard/mali_kbase_defs.h Change-Id: Ia7b8004b09ce31a5af6414c27b8ec776c247835a Signed-off-by: chenzhen --- diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index e25b7d944e98..8a47cddf7936 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r12p0-04rel0" +MALI_RELEASE_NAME ?= "r13p0-00rel0" # Paths required for build KBASE_PATH = $(src) @@ -88,7 +88,6 @@ SRC := \ mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ - mali_kbase_instr.c \ mali_kbase_vinstr.c \ mali_kbase_softjobs.c \ mali_kbase_10969_workaround.c \ @@ -104,6 +103,7 @@ SRC := \ mali_kbase_replay.c \ mali_kbase_mem_profile_debugfs.c \ mali_kbase_mmu_mode_lpae.c \ + mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_gator_api.c \ mali_kbase_debug_mem_view.c \ @@ -112,7 +112,8 @@ SRC := \ mali_kbase_mem_pool.c \ mali_kbase_mem_pool_debugfs.c \ mali_kbase_tlstream.c \ - mali_kbase_strings.c + mali_kbase_strings.c \ + mali_kbase_as_fault_debugfs.c ifeq ($(MALI_UNIT_TEST),1) SRC += mali_kbase_tlstream_test.c @@ -202,13 +203,6 @@ mali_kbase-y := $(SRC:.c=.o) mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o -ifneq ($(wildcard $(src)/internal/Kbuild),) -ifeq ($(MALI_CUSTOMER_RELEASE),0) -include $(src)/internal/Kbuild -mali_kbase-y += $(INTERNAL:.c=.o) -endif -endif - MALI_BACKEND_PATH ?= backend CONFIG_MALI_BACKEND ?= gpu CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND) diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index 367f0c787531..201832b262d4 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -60,7 +60,7 @@ config MALI_DEVFREQ available OPPs. config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali (EXPERIMENTAL)" + bool "DMA_BUF fence support for Mali" depends on MALI_MIDGARD && !KDS default n help @@ -92,14 +92,6 @@ config MALI_PRFCNT_SET_SECONDARY If unsure, say N. -config MALI_DEBUG_SHADER_SPLIT_FS - bool "Allow mapping of shader cores via sysfs" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option to provide a sysfs entry for runtime configuration of shader - core affinity masks. - config MALI_PLATFORM_FAKE bool "Enable fake platform device support" depends on MALI_MIDGARD && MALI_EXPERT @@ -162,6 +154,23 @@ config MALI_DEBUG help Select this option for increased checking and reporting of errors. +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT && SYNC + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + config MALI_NO_MALI bool "No Mali" depends on MALI_MIDGARD && MALI_EXPERT @@ -198,4 +207,19 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. +config MALI_GPU_MMU_AARCH64 + bool "Use AArch64 page tables" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Use AArch64 format page tables for the GPU instead of LPAE-style. + The two formats have the same functionality and performance but a + future GPU may deprecate or remove the legacy LPAE-style format. + + The LPAE-style format is supported on all Midgard and current Bifrost + GPUs. Enabling AArch64 format restricts the driver to only supporting + Bifrost GPUs. + + If in doubt, say N. + source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index 2f3c41a55807..c6862539c8dd 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -16,7 +16,6 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" -#include #include void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index 61d3eac6e230..ad05fe5bea8d 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,6 +135,14 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->private_data = NULL; +#ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + if (kbdev->devfreq_cooling) + memcpy(&kbdev->devfreq_cooling->last_status, stat, + sizeof(*stat)); +#endif +#endif + return 0; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c index 033caf9fa210..d578fd78e825 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -20,7 +20,6 @@ * Register-based HW access backend APIs */ #include -#include #include #include #include diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 4e70b34ffaa6..3f06a10f7fed 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -22,8 +22,8 @@ */ #include -#include #include +#include #include #include #include @@ -41,14 +41,6 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) u32 irq_mask; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Wait for any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.cache_clean_wait, - kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_RESETTING); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_REQUEST_CLEAN); @@ -75,19 +67,14 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, { unsigned long flags, pm_flags; int err = -EINVAL; - struct kbasep_js_device_data *js_devdata; u32 irq_mask; int ret; u64 shader_cores_needed; u32 prfcnt_config; - KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); - shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); - js_devdata = &kbdev->js_data; - /* alignment failure */ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) goto out_err; @@ -102,14 +89,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -127,10 +106,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = setup->dump_buffer; - /* Remember all the settings for suspend/resume */ - if (&kbdev->hwcnt.suspended_state != setup) - memcpy(&kbdev->hwcnt.suspended_state, setup, - sizeof(kbdev->hwcnt.suspended_state)); /* Request the clean */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; @@ -199,14 +174,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); @@ -373,15 +340,11 @@ void kbasep_cache_clean_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Wait for our condition, and any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING || - kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_CLEANING) { + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); wait_event(kbdev->hwcnt.backend.cache_clean_wait, - (kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_RESETTING && kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING)); + KBASE_INSTR_STATE_CLEANING); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == @@ -414,9 +377,6 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) &kbdev->hwcnt.backend.cache_clean_work); KBASE_DEBUG_ASSERT(ret); } - /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, - * and the instrumentation state hasn't been restored yet - - * kbasep_reset_timeout_worker() will do the rest of the work */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -444,10 +404,6 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; wake_up(&kbdev->hwcnt.backend.cache_clean_wait); } - /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a - * reset, and the instrumentation state hasn't been restored yet - * - kbasep_reset_timeout_worker() will do the rest of the work - */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -465,14 +421,6 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { err = -EINVAL; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; @@ -496,14 +444,6 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - /* Check it's the context previously set up and we're not already * dumping */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h index 23bd80a5a150..4794672da8f0 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,10 +39,6 @@ enum kbase_instr_state { /* Cache clean completed, and either a) a dump is complete, or * b) instrumentation can now be setup. */ KBASE_INSTR_STATE_CLEANED, - /* kbasep_reset_timeout_worker() has started (but not compelted) a - * reset. This generally indicates the current action should be aborted, - * and kbasep_reset_timeout_worker() will handle the cleanup */ - KBASE_INSTR_STATE_RESETTING, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 8ccc440171a2..83d477898c5e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,6 +71,7 @@ struct slot_rb { * @reset_work: Work item for performing the reset * @reset_wait: Wait event signalled when the reset is complete * @reset_timer: Timeout for soft-stops before the reset + * @timeouts_updated: Have timeout values just been updated? * * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when * accessing this structure @@ -97,11 +98,15 @@ struct kbase_backend_data { /* The GPU reset process is currently occuring (timeout has expired or * kbasep_try_reset_gpu_early was called) */ #define KBASE_RESET_GPU_HAPPENING 3 - +/* Reset the GPU silently, used when resetting the GPU as part of normal + * behavior (e.g. when exiting protected mode). */ +#define KBASE_RESET_GPU_SILENT 4 struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; struct hrtimer reset_timer; + + bool timeouts_updated; }; /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 657717579c1c..00900a99a898 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -26,8 +26,8 @@ #include #endif #include +#include #include -#include #include #include #include @@ -82,19 +82,30 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ cfg = kctx->as_nr; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; #ifndef CONFIG_MALI_COH_GPU - cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; - cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; -#endif + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END)) + cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; +#endif /* CONFIG_MALI_COH_GPU */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) || + !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982)) + cfg |= JS_CONFIG_START_MMU; - cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && - (katom->atom_flags & KBASE_KATOM_FLAG_SECURE)) + (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; if (kbase_hw_has_feature(kbdev, @@ -465,7 +476,7 @@ static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, { bool soft_stops_allowed = true; - if (kbase_jd_katom_is_secure(katom)) { + if (kbase_jd_katom_is_protected(katom)) { soft_stops_allowed = false; } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { if ((katom->core_req & BASE_JD_REQ_T) != 0) @@ -475,7 +486,7 @@ static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, } static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, - u16 core_reqs) + base_jd_core_req core_reqs) { bool hard_stops_allowed = true; @@ -489,7 +500,7 @@ static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - u16 core_reqs, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { struct kbase_context *kctx = target_katom->kctx; @@ -734,7 +745,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; @@ -742,7 +752,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); - js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); @@ -1074,7 +1083,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, * state when the soft/hard-stop action is complete */ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - u16 core_reqs, struct kbase_jd_atom *target_katom) + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { u32 hw_action = action & JS_COMMAND_MASK; @@ -1151,26 +1160,6 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); } -static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *hwcnt_setup) -{ - hwcnt_setup->dump_buffer = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & - 0xffffffff; - hwcnt_setup->dump_buffer |= (u64) - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << - 32; - hwcnt_setup->jm_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); - hwcnt_setup->shader_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); - hwcnt_setup->tiler_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); - hwcnt_setup->mmu_l2_bm = - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); -} - static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags, mmu_flags; @@ -1178,10 +1167,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; - struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; - enum kbase_instr_state bckp_state; bool try_schedule = false; - bool restore_hwc = false; + bool silent = false; KBASE_DEBUG_ASSERT(data); @@ -1191,8 +1178,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_SILENT) + silent = true; + KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + /* Suspend vinstr. + * This call will block until vinstr is suspended. */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within * kbasep_try_reset_gpu_early. */ @@ -1242,39 +1237,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) * assume that anything that is still left on the GPU is stuck there and * we'll kill it when we reset the GPU */ - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { - /* the same interrupt handler preempted itself */ - /* GPU is being reset */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } - /* Save the HW counters setup */ - if (kbdev->hwcnt.kctx != NULL) { - struct kbase_context *kctx = kbdev->hwcnt.kctx; - - if (kctx->jctx.sched_info.ctx.is_scheduled) { - kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup); - - restore_hwc = true; - } - } - /* Output the state of some interesting registers to help in the * debugging of GPU resets */ - kbase_debug_dump_registers(kbdev); - - bckp_state = kbdev->hwcnt.backend.state; - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; - kbdev->hwcnt.backend.triggered = 0; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + if (!silent) + kbase_debug_dump_registers(kbdev); /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); @@ -1314,112 +1284,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); - dev_err(kbdev->dev, "Reset complete"); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) try_schedule = true; mutex_unlock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Restore the HW counters setup */ - if (restore_hwc) { - struct kbase_context *kctx = kbdev->hwcnt.kctx; - u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; - -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) - >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); - - if (arch_v6) - prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; -#endif - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - hwcnt_setup.dump_buffer >> 32, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - hwcnt_setup.jm_bm, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - hwcnt_setup.shader_bm, kctx); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - hwcnt_setup.mmu_l2_bm, kctx); - - /* Due to PRLAM-8186 we need to disable the Tiler before we - * enable the HW counter dump. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - 0, kctx); - else - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - hwcnt_setup.tiler_bm, kctx); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, - kctx); - - /* If HW has PRLAM-8186 we can now re-enable the tiler HW - * counters dump */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - hwcnt_setup.tiler_bm, kctx); - } - kbdev->hwcnt.backend.state = bckp_state; - switch (kbdev->hwcnt.backend.state) { - /* Cases for waking kbasep_cache_clean_worker worker */ - case KBASE_INSTR_STATE_CLEANED: - /* Cache-clean IRQ occurred, but we reset: - * Wakeup incase the waiter saw RESETTING */ - case KBASE_INSTR_STATE_REQUEST_CLEAN: - /* After a clean was requested, but before the regs were - * written: - * Wakeup incase the waiter saw RESETTING */ - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - break; - case KBASE_INSTR_STATE_CLEANING: - /* Either: - * 1) We've not got the Cache-clean IRQ yet: it was lost, or: - * 2) We got it whilst resetting: it was voluntarily lost - * - * So, move to the next state and wakeup: */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - break; - - /* Cases for waking anyone else */ - case KBASE_INSTR_STATE_DUMPING: - /* If dumping, abort the dump, because we may've lost the IRQ */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - break; - case KBASE_INSTR_STATE_DISABLED: - case KBASE_INSTR_STATE_IDLE: - case KBASE_INSTR_STATE_FAULT: - /* Every other reason: wakeup in that state */ - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - break; - - /* Unhandled cases */ - case KBASE_INSTR_STATE_RESETTING: - default: - BUG(); - break; - } - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - /* Resume the vinstr core */ - kbase_vinstr_hwc_resume(kbdev->vinstr_ctx); - - /* Note: counter dumping may now resume */ - mutex_lock(&kbdev->pm.lock); /* Find out what cores are required now */ @@ -1439,6 +1311,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) } kbase_pm_context_idle(kbdev); + + /* Release vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } @@ -1520,7 +1396,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * * Return: * The function returns a boolean which should be interpreted as follows: - * true - Prepared for reset, kbase_reset_gpu should be called. + * true - Prepared for reset, kbase_reset_gpu_locked should be called. * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ @@ -1614,4 +1490,29 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); } + +void kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +bool kbase_reset_gpu_active(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_NOT_PENDING) + return false; + + return true; +} #endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index eb068d40283b..8f1e5615ea43 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -96,7 +96,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - u16 core_reqs, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index af6cddcdb043..da7c4df7d277 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include #include -#include #include /* Return whether the specified ringbuffer is empty. HW access lock must be @@ -592,7 +592,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: @@ -603,6 +603,9 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ @@ -654,53 +657,145 @@ static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) return true; } -static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev) +static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) { - return kbdev->secure_mode; + return kbdev->protected_mode; } -static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) { int err = -EINVAL; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ONCE(!kbdev->secure_ops, - "Cannot enable secure mode: secure callbacks not specified.\n"); + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); - if (kbdev->secure_ops) { - /* Switch GPU to secure mode */ - err = kbdev->secure_ops->secure_mode_enable(kbdev); + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + err = kbdev->protected_ops->protected_mode_enter(kbdev); if (err) - dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err); + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); else - kbdev->secure_mode = true; + kbdev->protected_mode = true; } return err; } -static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev) +static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) { - int err = -EINVAL; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ONCE(!kbdev->secure_ops, - "Cannot disable secure mode: secure callbacks not specified.\n"); + WARN_ONCE(!kbdev->protected_ops, + "Cannot exit protected mode: protected callbacks not specified.\n"); - if (kbdev->secure_ops) { - /* Switch GPU to non-secure mode */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); + if (!kbdev->protected_ops) + return -EINVAL; - if (err) - dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); - else - kbdev->secure_mode = false; + kbdev->protected_mode_transition = true; + kbase_reset_gpu_silent(kbdev); + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + switch (katom[idx]->exit_protected_state) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + /* + * If the atom ahead of this one hasn't got to being + * submitted yet then bail. + */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + return -EAGAIN; + + /* If we're not exiting protected mode then we're done here. */ + if (!(kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx]))) + return 0; + + /* + * If there is a transition in progress, or work still + * on the GPU try again later. + */ + if (kbdev->protected_mode_transition || + kbase_gpu_atoms_submitted_any(kbdev)) + return -EAGAIN; + + /* + * Exiting protected mode requires a reset, but first the L2 + * needs to be powered down to ensure it's not active when the + * reset is issued. + */ + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: + if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_RESET; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* Issue the reset to the GPU */ + err = kbase_gpu_protected_mode_reset(kbdev); + if (err) { + /* Failed to exit protected mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + kbase_vinstr_resume(kbdev->vinstr_ctx); + + return -EINVAL; + } + + katom[idx]->exit_protected_state = + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: + if (kbase_reset_gpu_active(kbdev)) + return -EAGAIN; + + /* protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom[idx]) && js == 0) || + !kbase_jd_katom_is_protected(katom[idx]), + "Protected atom on JS%d not supported", js); } - return err; + return 0; } void kbase_gpu_slot_update(struct kbase_device *kbdev) @@ -719,6 +814,7 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; + int ret; if (!katom[idx]) continue; @@ -735,11 +831,29 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + /* + * Exiting protected mode must be done before + * the references on the cores are taken as + * a power down the L2 is required which + * can't happen after the references for this + * atom are taken. + */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: if (katom[idx]->will_fail_event_code) { kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); @@ -785,11 +899,12 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: + /* Only submit if head atom or previous atom * already submitted */ if (idx == 1 && @@ -797,7 +912,15 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; - if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { + /* + * If the GPU is transitioning protected mode + * then bail now and we'll be called when the + * new state has settled. + */ + if (kbdev->protected_mode_transition) + break; + + if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { int err = 0; /* Not in correct mode, take action */ @@ -811,16 +934,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) */ break; } - - /* No jobs running, so we can switch GPU mode right now */ - if (kbase_jd_katom_is_secure(katom[idx])) { - err = kbase_gpu_secure_mode_enable(kbdev); - } else { - err = kbase_gpu_secure_mode_disable(kbdev); + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + break; } + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + /* No jobs running, so we can switch GPU mode right now */ + err = kbase_gpu_protected_mode_enter(kbdev); if (err) { - /* Failed to switch secure mode, fail atom */ + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Only return if head atom or previous atom @@ -835,17 +968,18 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) } } - /* Secure mode sanity checks */ + /* Protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), - "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", - kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); + kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_READY: + /* Only submit if head atom or previous atom * already submitted */ if (idx == 1 && @@ -966,8 +1100,16 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); - kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); + kbase_tlstream_tl_nret_atom_lpu( + katom, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); + kbase_tlstream_tl_nret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); if (completion_code == BASE_JD_EVENT_STOPPED) { struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, @@ -1120,13 +1262,34 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) for (idx = 0; idx < 2; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + bool keep_in_jm_rb = false; - if (katom) { - kbase_gpu_release_atom(kbdev, katom, NULL); - kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, end_timestamp); + if (!katom) + continue; + + if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) + keep_in_jm_rb = true; + + kbase_gpu_release_atom(kbdev, katom, NULL); + + /* + * If the atom wasn't on HW when the reset was issued + * then leave it in the RB and next time we're kicked + * it will be processed again from the starting state. + */ + if (keep_in_jm_rb) { + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + continue; } + + /* + * The atom was on the HW when the reset was issued + * all we can do is fail the atom. + */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); } } } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 5459281aa968..d665420ab380 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -23,6 +23,7 @@ #include #include "mali_kbase_js_affinity.h" +#include "mali_kbase_hw.h" #include @@ -114,9 +115,14 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* Tiler only job, bit 0 needed to enable tiler but no shader - * cores required */ - *affinity = 1; + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = 1; + else + *affinity = 0; + return true; } @@ -172,9 +178,12 @@ bool kbase_js_choose_affinity(u64 * const affinity, if (*affinity == 0) return false; - /* Enable core 0 if tiler required */ - if (core_req & BASE_JD_REQ_T) - *affinity = *affinity | 1; + /* Enable core 0 if tiler required for hardware without XAFFINITY + * support (notes above) */ + if (core_req & BASE_JD_REQ_T) { + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + *affinity = *affinity | 1; + } return true; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h index 3026e6a58303..fbffa3b40962 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,14 +24,6 @@ #ifndef _KBASE_JS_AFFINITY_H_ #define _KBASE_JS_AFFINITY_H_ -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS -/* Import the external affinity mask variables */ -extern u64 mali_js0_affinity_mask; -extern u64 mali_js1_affinity_mask; -extern u64 mali_js2_affinity_mask; -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - - /** * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to * submit a job to a particular job slot in the current status diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index 1e9a7e4c466d..a23deb4ca20c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,6 +138,17 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata->gpu_reset_ticks_ss; } + /* If timeouts have been changed then ensure + * that atom tick count is not greater than the + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread + * changing the timeouts. */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->sched_info.cfs.ticks = + soft_stop_ticks; + /* Job is Soft-Stoppable */ if (ticks == soft_stop_ticks) { int disjoint_threshold = @@ -257,6 +268,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); + backend->timeouts_updated = false; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return HRTIMER_NORESTART; @@ -335,3 +348,10 @@ void kbase_backend_timer_resume(struct kbase_device *kbdev) kbase_backend_ctx_count_changed(kbdev); } +void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->timeouts_updated = true; +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index c6c7b89712da..4a3572d971a6 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, u32 num_pages) @@ -152,6 +152,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + /* record the fault status */ as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 5805efea1125..711e44c7f80a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -168,6 +168,7 @@ bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) /* Force all cores off */ kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; /* Force all cores to be unavailable, in the situation where * transitions are in progress for some cores but not others, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c index 487391168e25..f891fa225a89 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,7 +35,8 @@ static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) static bool coarse_demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap)) + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) return false; return true; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 6eb1b1c2bf0d..e8f96fe6c514 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -192,12 +192,14 @@ union kbase_pm_ca_policy_data { * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is * powered off * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders - * are powered off + * and/or timers are powered off * @gpu_poweroff_timer: Timer for powering off GPU * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq * @shader_poweroff_pending: Bit mask of shaders to be powered off on next * timer callback + * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer + * callback * @poweroff_timer_needed: true if the poweroff timer is currently required, * false otherwise * @poweroff_timer_running: true if the poweroff timer is currently running, @@ -274,6 +276,7 @@ struct kbase_pm_backend_data { struct work_struct gpu_poweroff_work; u64 shader_poweroff_pending; + u64 tiler_poweroff_pending; bool poweroff_timer_needed; bool poweroff_timer_running; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c index 9dac2303bd00..81322fd0dd17 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,7 +37,8 @@ static u64 demand_get_core_mask(struct kbase_device *kbdev) static bool demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap)) + kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) return false; return true; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 2c27ad93f2be..046ebcb7b508 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -29,7 +29,6 @@ #endif #include #include -#include #include #include #include @@ -510,10 +509,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); * @present: The bit mask of present caches * @cores_powered: A bit mask of cores (or L2 caches) that are desired to * be powered + * @tilers_powered: The bit mask of tilers that are desired to be powered * * Return: A bit mask of the caches that should be turned on */ -static u64 get_desired_cache_status(u64 present, u64 cores_powered) +static u64 get_desired_cache_status(u64 present, u64 cores_powered, + u64 tilers_powered) { u64 desired = 0; @@ -536,6 +537,10 @@ static u64 get_desired_cache_status(u64 present, u64 cores_powered) present &= ~bit_mask; } + /* Power up the required L2(s) for the tiler */ + if (tilers_powered) + desired |= 1; + return desired; } @@ -548,6 +553,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) bool in_desired_state = true; u64 desired_l2_state; u64 cores_powered; + u64 tilers_powered; u64 tiler_available_bitmap; u64 shader_available_bitmap; u64 shader_ready_bitmap; @@ -581,6 +587,10 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) cores_powered |= kbdev->pm.backend.desired_shader_state; + /* Work out which tilers want to be powered */ + tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + tilers_powered |= kbdev->pm.backend.desired_tiler_state; + /* If there are l2 cache users registered, keep all l2s powered even if * all other cores are off. */ if (kbdev->l2_users_count > 0) @@ -588,17 +598,11 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) desired_l2_state = get_desired_cache_status( kbdev->gpu_props.props.raw_props.l2_present, - cores_powered); + cores_powered, tilers_powered); /* If any l2 cache is on, then enable l2 #0, for use by job manager */ - if (0 != desired_l2_state) { + if (0 != desired_l2_state) desired_l2_state |= 1; - /* Also enable tiler if l2 cache is powered */ - kbdev->pm.backend.desired_tiler_state = - kbdev->gpu_props.props.raw_props.tiler_present; - } else { - kbdev->pm.backend.desired_tiler_state = 0; - } prev_l2_available_bitmap = kbdev->l2_available_bitmap; in_desired_state &= kbase_pm_transition_core_type(kbdev, @@ -1108,9 +1112,9 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { - if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */ + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */ + else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; } @@ -1233,48 +1237,10 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) } } -int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) { - unsigned long irq_flags; struct kbasep_reset_timeout_data rtdata; - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.lock); - - /* Ensure the clock is on before attempting to access the hardware */ - if (!kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.backend.callback_power_on) - kbdev->pm.backend.callback_power_on(kbdev); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - kbdev->pm.backend.gpu_powered = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - } - - /* Ensure interrupts are off to begin with, this also clears any - * outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); - /* Ensure cache snoops are disabled before reset. */ - kbase_pm_cache_snoop_disable(kbdev); - /* Prepare for the soft-reset */ - kbdev->pm.backend.reset_done = false; - - /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); - if (kbdev->shader_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - if (kbdev->tiler_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, (u32)0u); - kbdev->shader_available_bitmap = 0u; - kbdev->tiler_available_bitmap = 0u; - kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); - - /* Soft reset the GPU */ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); kbase_tlstream_jd_gpu_soft_reset(kbdev); @@ -1304,7 +1270,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - goto out; + return 0; } /* No interrupt has been received - check if the RAWSTAT register says @@ -1340,7 +1306,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - goto out; + return 0; } destroy_hrtimer_on_stack(&rtdata.timer); @@ -1348,10 +1314,75 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT); - /* The GPU still hasn't reset, give up */ return -EINVAL; +} + +static int kbase_pm_reset_do_protected(struct kbase_device *kbdev) +{ + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + kbase_tlstream_jd_gpu_soft_reset(kbdev); -out: + return kbdev->protected_ops->protected_mode_reset(kbdev); +} + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + int err; + bool resume_vinstr = false; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); + + /* Soft reset the GPU */ + if (kbdev->protected_mode_support && + kbdev->protected_ops->protected_mode_reset) + err = kbase_pm_reset_do_protected(kbdev); + else + err = kbase_pm_reset_do_normal(kbdev); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); + if (kbdev->protected_mode) + resume_vinstr = true; + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); + + if (err) + goto exit; if (flags & PM_HW_ISSUES_DETECT) kbase_pm_hw_issues_detect(kbdev); @@ -1360,12 +1391,12 @@ out: kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + /* Sanity check protected mode was left after reset */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { u32 gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); - kbdev->secure_mode = (gpu_status & - GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0; + WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); } /* If cycle counter was in use re-enable it, enable_irqs will only be @@ -1394,7 +1425,12 @@ out: if (flags & PM_ENABLE_IRQS) kbase_pm_enable_interrupts(kbdev); - return 0; +exit: + /* If GPU is leaving protected mode resume vinstr operation. */ + if (kbdev->vinstr_ctx && resume_vinstr) + kbase_vinstr_resume(kbdev->vinstr_ctx); + + return err; } /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index ad0e98be4571..4d006028089a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -155,16 +154,22 @@ static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) { u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; + u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; lockdep_assert_held(&kbdev->pm.power_change_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; + kbdev->pm.backend.desired_tiler_state &= + ~kbdev->pm.backend.tiler_poweroff_pending; kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; - if (prev_shader_state != kbdev->pm.backend.desired_shader_state - || kbdev->pm.backend.ca_in_transition) { + if (prev_shader_state != kbdev->pm.backend.desired_shader_state || + prev_tiler_state != + kbdev->pm.backend.desired_tiler_state || + kbdev->pm.backend.ca_in_transition) { bool cores_are_available; KBASE_TIMELINE_PM_CHECKTRANS(kbdev, @@ -202,7 +207,8 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) queue_work(kbdev->pm.backend.gpu_poweroff_wq, &kbdev->pm.backend.gpu_poweroff_work); - if (kbdev->pm.backend.shader_poweroff_pending) { + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { kbdev->pm.backend.shader_poweroff_pending_time--; KBASE_DEBUG_ASSERT( @@ -327,6 +333,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.gpu_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); @@ -381,8 +388,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (backend->shader_poweroff_pending) { + if (backend->shader_poweroff_pending || + backend->tiler_poweroff_pending) { backend->shader_poweroff_pending = 0; + backend->tiler_poweroff_pending = 0; backend->shader_poweroff_pending_time = 0; } @@ -441,6 +450,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { u64 desired_bitmap; + u64 desired_tiler_bitmap; bool cores_are_available; bool do_poweroff = false; @@ -453,23 +463,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - /* Enable core 0 if tiler required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + } if (kbdev->pm.backend.desired_shader_state != desired_bitmap) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap); /* Are any cores being powered on? */ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || + ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || kbdev->pm.backend.ca_in_transition) { /* Check if we are powering off any cores before updating shader * state */ - if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = @@ -479,21 +503,28 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) } kbdev->pm.backend.desired_shader_state = desired_bitmap; + kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; /* If any cores are being powered on, transition immediately */ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || + kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); + kbdev->pm.backend.tiler_poweroff_pending |= + (kbdev->pm.backend.desired_tiler_state & + ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else kbasep_pm_do_poweroff_cores(kbdev); } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && - kbdev->pm.backend.poweroff_timer_needed) { + desired_tiler_bitmap != 0 && + kbdev->pm.backend.poweroff_timer_needed) { /* If power policy is keeping cores on despite there being no * active contexts then disable poweroff timer as it isn't * required. @@ -504,11 +535,17 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) /* Ensure timer does not power off wanted cores and make sure to power * off unwanted cores */ - if (kbdev->pm.backend.shader_poweroff_pending != 0) { + if (kbdev->pm.backend.shader_poweroff_pending || + kbdev->pm.backend.tiler_poweroff_pending) { kbdev->pm.backend.shader_poweroff_pending &= ~(kbdev->pm.backend.desired_shader_state & desired_bitmap); - if (kbdev->pm.backend.shader_poweroff_pending == 0) + kbdev->pm.backend.tiler_poweroff_pending &= + ~(kbdev->pm.backend.desired_tiler_state & + desired_tiler_bitmap); + + if (!kbdev->pm.backend.shader_poweroff_pending && + !kbdev->pm.backend.tiler_poweroff_pending) kbdev->pm.backend.shader_poweroff_pending_time = 0; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c index 9d3eb10bd3c9..d965033905ca 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,12 @@ static struct thermal_zone_device *gpu_tz; static unsigned long model_static_power(unsigned long voltage) { - int temperature, temp; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + unsigned long temperature; +#else + int temperature; +#endif + unsigned long temp; unsigned long temp_squared, temp_cubed, temp_scaling_factor; const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; @@ -85,7 +90,11 @@ static unsigned long model_dynamic_power(unsigned long freq, return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +struct devfreq_cooling_ops power_model_simple_ops = { +#else struct devfreq_cooling_power power_model_simple_ops = { +#endif .get_static_power = model_static_power, .get_dynamic_power = model_dynamic_power, }; @@ -150,7 +159,7 @@ int kbase_power_model_simple_init(struct kbase_device *kbdev) dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) * 1000) / frequency; - if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) { + if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) { dev_err(kbdev->dev, "ts in power_model not available\n"); return -EINVAL; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h index 17eede4d917c..9b5e69a9323b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,6 +38,10 @@ */ int kbase_power_model_simple_init(struct kbase_device *kbdev); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +extern struct devfreq_cooling_ops power_model_simple_ops; +#else extern struct devfreq_cooling_power power_model_simple_ops; +#endif #endif /* _BASE_POWER_MODEL_SIMPLE_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c index 4bcde85f3ee1..d992989123e8 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,9 +74,10 @@ void kbase_wait_write_flush(struct kbase_context *kctx) { u32 base_count = 0; - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread */ - + /* + * The caller must be holding onto the kctx or the call is from + * userspace. + */ kbase_pm_context_active(kctx->kbdev); kbase_pm_request_gpu_cycle_counter(kctx->kbdev); diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index a6e08da7035c..f7c0ff674906 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -27,6 +27,7 @@ enum base_hw_feature { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, BASE_HW_FEATURE_MRT, BASE_HW_FEATURE_BRNDOUT_CC, @@ -87,6 +88,7 @@ static const enum base_hw_feature base_hw_features_t72x[] = { static const enum base_hw_feature base_hw_features_t76x[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -104,6 +106,7 @@ static const enum base_hw_feature base_hw_features_t76x[] = { static const enum base_hw_feature base_hw_features_tFxx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -124,6 +127,7 @@ static const enum base_hw_feature base_hw_features_t83x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -145,6 +149,7 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -165,6 +170,7 @@ static const enum base_hw_feature base_hw_features_t82x[] = { static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 55e5e5a4f5d2..149f44cb8674 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -71,6 +71,7 @@ enum base_hw_issue { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -104,12 +105,14 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -162,6 +165,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -203,6 +207,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -239,6 +244,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -267,6 +273,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -293,6 +300,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -316,6 +324,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -354,6 +363,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -381,6 +391,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -406,6 +417,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -433,6 +445,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -458,6 +471,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -480,6 +494,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -488,6 +503,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -506,6 +522,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -524,6 +541,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -543,6 +561,7 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, @@ -567,6 +586,7 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -578,6 +598,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_8778, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -595,6 +616,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -625,6 +647,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -645,6 +668,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -663,6 +687,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -681,6 +706,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -695,6 +721,7 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -716,6 +743,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -734,6 +762,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -752,6 +781,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -766,6 +796,7 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3979, BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_T76X_3982, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -785,6 +816,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -803,6 +835,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -811,13 +844,13 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -839,6 +872,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -858,6 +892,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -876,6 +911,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -884,12 +920,12 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -902,10 +938,12 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_END }; @@ -915,12 +953,14 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_END }; @@ -928,12 +968,14 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3982, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 873cd929dc78..749dd9a1cc9e 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -42,6 +42,9 @@ /* Support UK9 IOCTLS */ #define BASE_LEGACY_UK9_SUPPORT 1 +/* Support UK10_2 IOCTLS */ +#define BASE_LEGACY_UK10_2_SUPPORT 1 + typedef struct base_mem_handle { struct { u64 handle; @@ -290,7 +293,6 @@ typedef enum base_backing_threshold_status { BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ - BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ } base_backing_threshold_status; @@ -442,11 +444,20 @@ typedef u8 base_jd_dep_type; * Special case is ::BASE_JD_REQ_DEP, which is used to express complex * dependencies, and that doesn't execute anything on the hardware. */ -typedef u16 base_jd_core_req; +typedef u32 base_jd_core_req; /* Requirements that come from the HW */ -#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */ -#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */ + +/** + * No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/** + * Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + /** * Requires compute shaders * This covers any of the following Midgard Job types: @@ -458,15 +469,15 @@ typedef u16 base_jd_core_req; * job is specifically just the "Compute Shader" job type, and not the "Vertex * Shader" nor the "Geometry Shader" job type. */ -#define BASE_JD_REQ_CS (1U << 1) -#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ /* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ /* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC (1U << 13) +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) /** * SW-only requirement: coalesce completion events. @@ -476,20 +487,20 @@ typedef u16 base_jd_core_req; * * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. */ -#define BASE_JD_REQ_EVENT_COALESCE (1U << 5) +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) /** * SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. */ -#define BASE_JD_REQ_COHERENT_GROUP (1U << 6) +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) /** * SW Only requirement: The performance counters should be enabled only when * they are needed, to reduce power consumption. */ -#define BASE_JD_REQ_PERMON (1U << 7) +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) /** * SW Only requirement: External resources are referenced by this atom. @@ -500,13 +511,13 @@ typedef u16 base_jd_core_req; * * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) /** * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted * to the hardware but will cause some action to happen within the driver */ -#define BASE_JD_REQ_SOFT_JOB (1U << 9) +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) #define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) #define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) @@ -618,49 +629,79 @@ typedef u16 base_jd_core_req; * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. */ -#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) /** * HW Requirement: Use the base_jd_atom::device_nr field to specify a * particular core group * - * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority + * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority * - * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. * * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11) +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) /** * SW Flag: If this bit is set then the successful completion of this atom * will not cause an event to be sent to userspace */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12) +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) /** * SW Flag: If this bit is set then completion of this atom will not cause an * event to be sent to userspace, whether successful or not. */ -#define BASEP_JD_REQ_EVENT_NEVER (1U << 14) +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) /** - * These requirement bits are currently unused in base_jd_core_req (currently a u16) + * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if + * the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/** + * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if + * the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) -#define BASEP_JD_REQ_RESERVED (1U << 15) +/** + * These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) /** * Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ - BASE_JD_REQ_EXTERNAL_RESOURCES |\ - BASEP_JD_REQ_EVENT_NEVER |\ - BASE_JD_REQ_EVENT_COALESCE)) +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) /** * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which @@ -764,18 +805,26 @@ struct base_dependency { base_jd_dep_type dependency_type; /**< Dependency type */ }; +/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. + * In order to keep the size of the structure same, padding field has been adjusted + * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) + * is added at the end of the structure. Place in the structure previously occupied by u16 core_req + * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission + * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left + * for possible future use. */ typedef struct base_jd_atom_v2 { u64 jc; /**< job-chain GPU address */ struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - base_jd_core_req core_req; /**< core requirements */ + u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, this is done in order to reduce possibility of improper assigment of a dependency field */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 padding[5]; + u8 padding[1]; + base_jd_core_req core_req; /**< core requirements */ } base_jd_atom_v2; #ifdef BASE_LEGACY_UK6_SUPPORT @@ -784,14 +833,14 @@ struct base_jd_atom_v2_uk6 { struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - base_jd_core_req core_req; /**< core requirements */ + u16 core_req; /**< core requirements */ base_atom_id pre_dep[2]; /**< pre-dependencies */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< priority - smaller is higher priority */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ u8 padding[7]; }; -#endif +#endif /* BASE_LEGACY_UK6_SUPPORT */ typedef enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, @@ -1603,7 +1652,7 @@ typedef struct mali_base_gpu_props { * Flags to pass to ::base_context_init. * Flags can be ORed together to enable multiple things. * - * These share the same space as @ref basep_context_private_flags, and so must + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must * not collide with them. */ enum base_context_create_flags { @@ -1632,7 +1681,7 @@ enum base_context_create_flags { #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) -/** +/* * Private flags used on the base context * * These start at bit 31, and run down to zero. @@ -1640,10 +1689,8 @@ enum base_context_create_flags { * They share the same space as @ref base_context_create_flags, and so must * not collide with them. */ -enum basep_context_private_flags { - /** Private flag tracking whether job descriptor dumping is disabled */ - BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31) -}; +/** Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) /** @} end group base_user_api_core */ @@ -1716,9 +1763,21 @@ typedef struct base_jd_replay_payload { * Core requirements for the fragment job chain */ base_jd_core_req fragment_core_req; +} base_jd_replay_payload; +#ifdef BASE_LEGACY_UK10_2_SUPPORT +typedef struct base_jd_replay_payload_uk10_2 { + u64 tiler_jc_list; + u64 fragment_jc; + u64 tiler_heap_free; + u16 fragment_hierarchy_mask; + u16 tiler_hierarchy_mask; + u32 hierarchy_default_weight; + u16 tiler_core_req; + u16 fragment_core_req; u8 padding[4]; -} base_jd_replay_payload; +} base_jd_replay_payload_uk10_2; +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ /** * @brief An entry in the linked list of job chains to be replayed. This must diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index f09b30a3dff3..b6d28fea9987 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -175,7 +175,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom); void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - u16 core_reqs, struct kbase_jd_atom *target_katom); + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); @@ -193,15 +193,15 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom); +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status); bool kbase_replay_process(struct kbase_jd_atom *katom); -enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer); +void kbasep_soft_job_timeout_worker(unsigned long data); void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); -int kbasep_read_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char *status); -int kbasep_write_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char new_status); /* api used internally for register access. Contains validation and tracing */ void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c new file mode 100644 index 000000000000..f910fe970feb --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c @@ -0,0 +1,102 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include + +#include +#include + +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + +static int kbase_as_fault_read(struct seq_file *sfile, void *data) +{ + uintptr_t as_no = (uintptr_t) sfile->private; + + struct list_head *entry; + const struct list_head *kbdev_list; + struct kbase_device *kbdev = NULL; + + kbdev_list = kbase_dev_list_get(); + + list_for_each(entry, kbdev_list) { + kbdev = list_entry(entry, struct kbase_device, entry); + + if(kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + + /* don't show this one again until another fault occors */ + kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); + + /* output the last page fault addr */ + seq_printf(sfile, "%llu\n", (u64) kbdev->as[as_no].fault_addr); + } + + } + + kbase_dev_list_put(kbdev_list); + + return 0; +} + +static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_as_fault_read , in->i_private); +} + +static const struct file_operations as_fault_fops = { + .open = kbase_as_fault_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + +/* + * Initialize debugfs entry for each address space + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + uint i; + char as_name[64]; + struct dentry *debugfs_directory; + + kbdev->debugfs_as_read_bitmap = 0ULL; + + KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); + KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64)); + + debugfs_directory = debugfs_create_dir("address_spaces", + kbdev->mali_debugfs_directory); + + if(debugfs_directory) { + for(i = 0; i < kbdev->nr_hw_address_spaces; i++) { + snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); + debugfs_create_file(as_name, S_IRUGO, + debugfs_directory, (void*) ((uintptr_t) i), &as_fault_fops); + } + } + else + dev_warn(kbdev->dev, "unable to create address_spaces debugfs directory"); + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + return; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h new file mode 100644 index 000000000000..3ed2248897fc --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_AS_FAULT_DEBUG_FS_H +#define _KBASE_AS_FAULT_DEBUG_FS_H + +/** + * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults + * + * @kbdev: Pointer to kbase_device + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_as_fault_debugfs_new() - make the last fault available on debugfs + * + * @kbdev: Pointer to kbase_device + * @as_no: The address space the fault occurred on + */ +static inline void +kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_MALI_DEBUG */ + return; +} + +#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index 7cabbecea23f..9b00cce9b2b3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -216,10 +216,10 @@ enum { #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ /* - * Default timeout for software event jobs, after which these jobs will be - * cancelled. + * Default timeout for some software jobs, after which the software event wait + * jobs will be cancelled. */ -#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */ +#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */ /* * Default minimum number of scheduling ticks before the GPU is reset to clear a diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 0548d2b124ac..344a1f16de8a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -23,7 +23,6 @@ #include #include -#include #include /** @@ -92,6 +91,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_jd; + atomic_set(&kctx->drain_pending, 0); + mutex_init(&kctx->reg_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); @@ -142,9 +143,9 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); - hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker; + setup_timer(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker, + (uintptr_t)kctx); return kctx; diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index e17708c9cde7..ee59504cd4e8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -17,13 +17,10 @@ #include "platform/rk/custom_log.h" #include -#include #include #include #include -#include #include -#include #include #ifdef CONFIG_MALI_DEVFREQ #include @@ -62,12 +59,10 @@ #include #include #include -#include #include #include /* is_compat_task */ #include #include -#include #ifdef CONFIG_MALI_PLATFORM_DEVICETREE #include #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ @@ -96,6 +91,8 @@ #include +#include + /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 #define MMU_IRQ_TAG 1 @@ -151,201 +148,6 @@ static int kds_resource_release(struct inode *inode, struct file *file) } return 0; } - -static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list) -{ - struct base_external_resource *res = ext_res; - int res_id; - - /* assume we have to wait for all */ - - KBASE_DEBUG_ASSERT(0 != num_elems); - resources_list->kds_resources = kmalloc_array(num_elems, - sizeof(struct kds_resource *), GFP_KERNEL); - - if (NULL == resources_list->kds_resources) - return -ENOMEM; - - KBASE_DEBUG_ASSERT(0 != num_elems); - resources_list->kds_access_bitmap = kzalloc( - sizeof(unsigned long) * - ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG), - GFP_KERNEL); - - if (NULL == resources_list->kds_access_bitmap) { - kfree(resources_list->kds_access_bitmap); - return -ENOMEM; - } - - kbase_gpu_vm_lock(kctx); - for (res_id = 0; res_id < num_elems; res_id++, res++) { - int exclusive; - struct kbase_va_region *reg; - struct kds_resource *kds_res = NULL; - - exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE; - reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - - /* did we find a matching region object? */ - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) - break; - - /* no need to check reg->alloc as only regions with an alloc has - * a size, and kbase_region_tracker_find_region_enclosing_address - * only returns regions with size > 0 */ - switch (reg->gpu_alloc->type) { -#if defined(CONFIG_UMP) && defined(CONFIG_KDS) - case KBASE_MEM_TYPE_IMPORTED_UMP: - kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); - break; -#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */ - default: - break; - } - - /* no kds resource for the region ? */ - if (!kds_res) - break; - - resources_list->kds_resources[res_id] = kds_res; - - if (exclusive) - set_bit(res_id, resources_list->kds_access_bitmap); - } - kbase_gpu_vm_unlock(kctx); - - /* did the loop run to completion? */ - if (res_id == num_elems) - return 0; - - /* Clean up as the resource list is not valid. */ - kfree(resources_list->kds_resources); - kfree(resources_list->kds_access_bitmap); - - return -EINVAL; -} - -static bool kbasep_validate_kbase_pointer( - struct kbase_context *kctx, union kbase_pointer *p) -{ - if (kctx->is_compat) { - if (p->compat_value == 0) - return false; - } else { - if (NULL == p->value) - return false; - } - return true; -} - -static int kbase_external_buffer_lock(struct kbase_context *kctx, - struct kbase_uk_ext_buff_kds_data *args, u32 args_size) -{ - struct base_external_resource *ext_res_copy; - size_t ext_resource_size; - int ret = -EINVAL; - int fd = -EBADF; - struct base_external_resource __user *ext_res_user; - int __user *file_desc_usr; - struct kbasep_kds_resource_set_file_data *fdata; - struct kbase_kds_resource_list_data resource_list_data; - - if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data)) - return -EINVAL; - - /* Check user space has provided valid data */ - if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) || - !kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) || - (0 == args->num_res) || - (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES)) - return -EINVAL; - - ext_resource_size = sizeof(struct base_external_resource) * args->num_res; - - KBASE_DEBUG_ASSERT(0 != ext_resource_size); - ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL); - - if (!ext_res_copy) - return -EINVAL; -#ifdef CONFIG_COMPAT - if (kctx->is_compat) { - ext_res_user = compat_ptr(args->external_resource.compat_value); - file_desc_usr = compat_ptr(args->file_descriptor.compat_value); - } else { -#endif /* CONFIG_COMPAT */ - ext_res_user = args->external_resource.value; - file_desc_usr = args->file_descriptor.value; -#ifdef CONFIG_COMPAT - } -#endif /* CONFIG_COMPAT */ - - /* Copy the external resources to lock from user space */ - if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) - goto out; - - /* Allocate data to be stored in the file */ - fdata = kmalloc(sizeof(*fdata), GFP_KERNEL); - - if (!fdata) { - ret = -ENOMEM; - goto out; - } - - /* Parse given elements and create resource and access lists */ - ret = kbasep_kds_allocate_resource_list_data(kctx, - ext_res_copy, args->num_res, &resource_list_data); - if (!ret) { - long err; - - fdata->lock = NULL; - - fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0); - - err = copy_to_user(file_desc_usr, &fd, sizeof(fd)); - - /* If the file descriptor was valid and we successfully copied - * it to user space, then we can try and lock the requested - * kds resources. - */ - if ((fd >= 0) && (0 == err)) { - struct kds_resource_set *lock; - - lock = kds_waitall(args->num_res, - resource_list_data.kds_access_bitmap, - resource_list_data.kds_resources, - KDS_WAIT_BLOCKING); - - if (!lock) { - ret = -EINVAL; - } else if (IS_ERR(lock)) { - ret = PTR_ERR(lock); - } else { - ret = 0; - fdata->lock = lock; - } - } else { - ret = -EINVAL; - } - - kfree(resource_list_data.kds_resources); - kfree(resource_list_data.kds_access_bitmap); - } - - if (ret) { - /* If the file was opened successfully then close it which will - * clean up the file data, otherwise we clean up the file data - * ourself. - */ - if (fd >= 0) - sys_close(fd); - else - kfree(fdata); - } -out: - kfree(ext_res_copy); - - return ret; -} #endif /* CONFIG_KDS */ static void kbase_create_timeline_objects(struct kbase_context *kctx) @@ -632,10 +434,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } - if (kbase_mem_import(kctx, mem_import->type, phandle, - &mem_import->gpu_va, - &mem_import->va_pages, - &mem_import->flags)) { + if (kbase_mem_import(kctx, + (enum base_mem_import_type) + mem_import->type, + phandle, + &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) { mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ukh->ret = MALI_ERROR_FUNCTION_FAILED; } @@ -991,26 +796,6 @@ copy_failed: break; } - case KBASE_FUNC_EXT_BUFFER_LOCK: - { -#ifdef CONFIG_KDS - ret = kbase_external_buffer_lock(kctx, - (struct kbase_uk_ext_buff_kds_data *)args, - args_size); - switch (ret) { - case 0: - ukh->ret = MALI_ERROR_NONE; - break; - case -ENOMEM: - ukh->ret = MALI_ERROR_OUT_OF_MEMORY; - break; - default: - ukh->ret = MALI_ERROR_FUNCTION_FAILED; - } -#endif /* CONFIG_KDS */ - break; - } - case KBASE_FUNC_SET_TEST_DATA: { #if MALI_UNIT_TEST @@ -1236,16 +1021,9 @@ copy_failed: (update->flags != 0)) goto out_bad; - if (kbasep_write_soft_event_status( - kctx, update->evt, - update->new_status) != 0) { + if (kbase_soft_event_update(kctx, update->evt, + update->new_status)) ukh->ret = MALI_ERROR_FUNCTION_FAILED; - break; - } - - if (update->new_status == BASE_JD_SOFT_EVENT_SET) - kbasep_complete_triggered_soft_events( - kctx, update->evt); break; } @@ -1412,7 +1190,7 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); kbase_jit_debugfs_add(kctx); -#endif /* CONFIG_DEBUGFS */ +#endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -2085,7 +1863,7 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); /** - * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs + * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs * file. * * @dev: The device this sysfs file is for. @@ -2093,37 +1871,40 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); * @buf: The value written to the sysfs file. * @count: The number of bytes written to the sysfs file. * - * This allows setting the timeout for software event jobs. Waiting jobs will - * be cancelled after this period expires. This is expressed in milliseconds. + * This allows setting the timeout for software jobs. Waiting soft event wait + * jobs will be cancelled after this period expires, while soft fence wait jobs + * will print debug information if the fence debug feature is enabled. + * + * This is expressed in milliseconds. * * Return: count if the function succeeded. An error code on failure. */ -static ssize_t set_soft_event_timeout(struct device *dev, +static ssize_t set_soft_job_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; - int soft_event_timeout_ms; + int soft_job_timeout_ms; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) || - (soft_event_timeout_ms <= 0)) + if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || + (soft_job_timeout_ms <= 0)) return -EINVAL; - atomic_set(&kbdev->js_data.soft_event_timeout_ms, - soft_event_timeout_ms); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + soft_job_timeout_ms); return count; } /** - * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs + * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs * file. * - * This will return the timeout for the software event jobs. + * This will return the timeout for the software jobs. * * @dev: The device this sysfs file is for. * @attr: The attributes of the sysfs file. @@ -2131,7 +1912,7 @@ static ssize_t set_soft_event_timeout(struct device *dev, * * Return: The number of bytes output to buf. */ -static ssize_t show_soft_event_timeout(struct device *dev, +static ssize_t show_soft_job_timeout(struct device *dev, struct device_attribute *attr, char * const buf) { @@ -2142,11 +1923,27 @@ static ssize_t show_soft_event_timeout(struct device *dev, return -ENODEV; return scnprintf(buf, PAGE_SIZE, "%i\n", - atomic_read(&kbdev->js_data.soft_event_timeout_ms)); + atomic_read(&kbdev->js_data.soft_job_timeout_ms)); } -static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR, - show_soft_event_timeout, set_soft_event_timeout); +static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, + show_soft_job_timeout, set_soft_job_timeout); + +static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, + int default_ticks, u32 old_ticks) +{ + if (timeout_ms > 0) { + u64 ticks = timeout_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + if (!ticks) + return 1; + return ticks; + } else if (timeout_ms < 0) { + return default_ticks; + } else { + return old_ticks; + } +} /** Store callback for the @c js_timeouts sysfs file. * @@ -2192,99 +1989,45 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr &js_reset_ms_cl, &js_reset_ms_dumping); if (items == 8) { - u64 ticks; - - if (js_soft_stop_ms >= 0) { - ticks = js_soft_stop_ms * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_soft_stop_ticks = ticks; - } else { - kbdev->js_soft_stop_ticks = -1; - } - - if (js_soft_stop_ms_cl >= 0) { - ticks = js_soft_stop_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_soft_stop_ticks_cl = ticks; - } else { - kbdev->js_soft_stop_ticks_cl = -1; - } - - if (js_hard_stop_ms_ss >= 0) { - ticks = js_hard_stop_ms_ss * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_ss = ticks; - } else { - kbdev->js_hard_stop_ticks_ss = -1; - } - - if (js_hard_stop_ms_cl >= 0) { - ticks = js_hard_stop_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_cl = ticks; - } else { - kbdev->js_hard_stop_ticks_cl = -1; - } - - if (js_hard_stop_ms_dumping >= 0) { - ticks = js_hard_stop_ms_dumping * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_hard_stop_ticks_dumping = ticks; - } else { - kbdev->js_hard_stop_ticks_dumping = -1; - } - - if (js_reset_ms_ss >= 0) { - ticks = js_reset_ms_ss * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_ss = ticks; - } else { - kbdev->js_reset_ticks_ss = -1; - } - - if (js_reset_ms_cl >= 0) { - ticks = js_reset_ms_cl * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_cl = ticks; - } else { - kbdev->js_reset_ticks_cl = -1; - } - - if (js_reset_ms_dumping >= 0) { - ticks = js_reset_ms_dumping * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - kbdev->js_reset_ticks_dumping = ticks; - } else { - kbdev->js_reset_ticks_dumping = -1; - } - - kbdev->js_timeouts_updated = true; - - dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_soft_stop_ticks, - js_soft_stop_ms); - dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_soft_stop_ticks_cl, - js_soft_stop_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_hard_stop_ticks_ss, - js_hard_stop_ms_ss); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_hard_stop_ticks_cl, - js_hard_stop_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n", - (unsigned long) - kbdev->js_hard_stop_ticks_dumping, - js_hard_stop_ms_dumping); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_ss, - js_reset_ms_ss); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_cl, - js_reset_ms_cl); - dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n", - (unsigned long)kbdev->js_reset_ticks_dumping, - js_reset_ms_dumping); + struct kbasep_js_device_data *js_data = &kbdev->js_data; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + +#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ + js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ + default, js_data->ticks_name); \ + dev_dbg(kbdev->dev, "Overriding " #ticks_name \ + " with %lu ticks (%lu ms)\n", \ + (unsigned long)js_data->ticks_name, \ + ms_name); \ + } while (0) + + UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, + DEFAULT_JS_SOFT_STOP_TICKS); + UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, + DEFAULT_JS_SOFT_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? + DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : + DEFAULT_JS_HARD_STOP_TICKS_SS); + UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, + DEFAULT_JS_HARD_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_dumping, + js_hard_stop_ms_dumping, + DEFAULT_JS_HARD_STOP_TICKS_DUMPING); + UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? + DEFAULT_JS_RESET_TICKS_SS_8408 : + DEFAULT_JS_RESET_TICKS_SS); + UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, + DEFAULT_JS_RESET_TICKS_CL); + UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, + DEFAULT_JS_RESET_TICKS_DUMPING); + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); return count; } @@ -2295,6 +2038,16 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr return -EINVAL; } +static unsigned long get_js_timeout_in_ms( + u32 scheduling_period_ns, + u32 ticks) +{ + u64 ms = (u64)ticks * scheduling_period_ns; + + do_div(ms, 1000000UL); + return ms; +} + /** Show callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2311,7 +2064,6 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att { struct kbase_device *kbdev; ssize_t ret; - u64 ms; unsigned long js_soft_stop_ms; unsigned long js_soft_stop_ms_cl; unsigned long js_hard_stop_ms_ss; @@ -2320,84 +2072,28 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att unsigned long js_reset_ms_ss; unsigned long js_reset_ms_cl; unsigned long js_reset_ms_dumping; - unsigned long ticks; u32 scheduling_period_ns; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - /* If no contexts have been scheduled since js_timeouts was last written - * to, the new timeouts might not have been latched yet. So check if an - * update is pending and use the new values if necessary. */ - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) - scheduling_period_ns = kbdev->js_scheduling_period_ns; - else - scheduling_period_ns = kbdev->js_data.scheduling_period_ns; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) - ticks = kbdev->js_soft_stop_ticks; - else - ticks = kbdev->js_data.soft_stop_ticks; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_soft_stop_ms = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) - ticks = kbdev->js_soft_stop_ticks_cl; - else - ticks = kbdev->js_data.soft_stop_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_soft_stop_ms_cl = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) - ticks = kbdev->js_hard_stop_ticks_ss; - else - ticks = kbdev->js_data.hard_stop_ticks_ss; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_ss = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) - ticks = kbdev->js_hard_stop_ticks_cl; - else - ticks = kbdev->js_data.hard_stop_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_cl = (unsigned long)ms; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) - ticks = kbdev->js_hard_stop_ticks_dumping; - else - ticks = kbdev->js_data.hard_stop_ticks_dumping; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_hard_stop_ms_dumping = (unsigned long)ms; + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) - ticks = kbdev->js_reset_ticks_ss; - else - ticks = kbdev->js_data.gpu_reset_ticks_ss; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_ss = (unsigned long)ms; +#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ + scheduling_period_ns, \ + kbdev->js_data.name) - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) - ticks = kbdev->js_reset_ticks_cl; - else - ticks = kbdev->js_data.gpu_reset_ticks_cl; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_cl = (unsigned long)ms; + js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); + js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); + js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); + js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); + js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); + js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); + js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); + js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) - ticks = kbdev->js_reset_ticks_dumping; - else - ticks = kbdev->js_data.gpu_reset_ticks_dumping; - ms = (u64)ticks * scheduling_period_ns; - do_div(ms, 1000000UL); - js_reset_ms_dumping = (unsigned long)ms; +#undef GET_TIMEOUT ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", js_soft_stop_ms, js_soft_stop_ms_cl, @@ -2428,6 +2124,16 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att */ static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); +static u32 get_new_js_timeout( + u32 old_period, + u32 old_ticks, + u32 new_scheduling_period_ns) +{ + u64 ticks = (u64)old_period * (u64)old_ticks; + do_div(ticks, new_scheduling_period_ns); + return ticks?ticks:1; +} + /** * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs * file @@ -2450,12 +2156,15 @@ static ssize_t set_js_scheduling_period(struct device *dev, unsigned int js_scheduling_period; u32 new_scheduling_period_ns; u32 old_period; - u64 ticks; + struct kbasep_js_device_data *js_data; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; + js_data = &kbdev->js_data; + ret = kstrtouint(buf, 0, &js_scheduling_period); if (ret || !js_scheduling_period) { dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" @@ -2466,86 +2175,39 @@ static ssize_t set_js_scheduling_period(struct device *dev, new_scheduling_period_ns = js_scheduling_period * 1000000; /* Update scheduling timeouts */ - mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&js_data->runpool_mutex); + spin_lock_irqsave(&js_data->runpool_irq.lock, flags); /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an * update is pending and use the new values if necessary. */ /* Use previous 'new' scheduling period as a base if present. */ - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns) - old_period = kbdev->js_scheduling_period_ns; - else - old_period = kbdev->js_data.scheduling_period_ns; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) - ticks = (u64)kbdev->js_soft_stop_ticks * old_period; - else - ticks = (u64)kbdev->js_data.soft_stop_ticks * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_soft_stop_ticks = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) - ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.soft_stop_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_ss * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1; - - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1; + old_period = js_data->scheduling_period_ns; - if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) - ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period; - else - ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1; +#define SET_TIMEOUT(name) \ + (js_data->name = get_new_js_timeout(\ + old_period, \ + kbdev->js_data.name, \ + new_scheduling_period_ns)) - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) - ticks = (u64)kbdev->js_reset_ticks_ss * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_ss = ticks ? ticks : 1; + SET_TIMEOUT(soft_stop_ticks); + SET_TIMEOUT(soft_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_ss); + SET_TIMEOUT(hard_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_dumping); + SET_TIMEOUT(gpu_reset_ticks_ss); + SET_TIMEOUT(gpu_reset_ticks_cl); + SET_TIMEOUT(gpu_reset_ticks_dumping); - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) - ticks = (u64)kbdev->js_reset_ticks_cl * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_cl = ticks ? ticks : 1; +#undef SET_TIMEOUT - if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) - ticks = (u64)kbdev->js_reset_ticks_dumping * old_period; - else - ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping * - kbdev->js_data.scheduling_period_ns; - do_div(ticks, new_scheduling_period_ns); - kbdev->js_reset_ticks_dumping = ticks ? ticks : 1; + js_data->scheduling_period_ns = new_scheduling_period_ns; - kbdev->js_scheduling_period_ns = new_scheduling_period_ns; - kbdev->js_timeouts_updated = true; + kbase_js_set_timeouts(kbdev); - mutex_unlock(&kbdev->js_data.runpool_mutex); + spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags); + mutex_unlock(&js_data->runpool_mutex); dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", js_scheduling_period); @@ -2576,10 +2238,7 @@ static ssize_t show_js_scheduling_period(struct device *dev, if (!kbdev) return -ENODEV; - if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) - period = kbdev->js_scheduling_period_ns; - else - period = kbdev->js_data.scheduling_period_ns; + period = kbdev->js_data.scheduling_period_ns; ret = scnprintf(buf, PAGE_SIZE, "%d\n", period / 1000000); @@ -3208,53 +2867,42 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, set_mem_pool_max_size); -static int kbasep_secure_mode_enable(struct kbase_device *kbdev) +static int kbasep_protected_mode_enter(struct kbase_device *kbdev) { kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SET_PROTECTED_MODE, NULL); return 0; } -static int kbasep_secure_mode_disable(struct kbase_device *kbdev) +static bool kbasep_protected_mode_supported(struct kbase_device *kbdev) { - if (!kbase_prepare_to_reset_gpu_locked(kbdev)) - return -EBUSY; - - kbase_reset_gpu_locked(kbdev); - - return 0; + return true; } -static struct kbase_secure_ops kbasep_secure_ops = { - .secure_mode_enable = kbasep_secure_mode_enable, - .secure_mode_disable = kbasep_secure_mode_disable, +static struct kbase_protected_ops kbasep_protected_ops = { + .protected_mode_enter = kbasep_protected_mode_enter, + .protected_mode_reset = NULL, + .protected_mode_supported = kbasep_protected_mode_supported, }; -static void kbasep_secure_mode_init(struct kbase_device *kbdev) +static void kbasep_protected_mode_init(struct kbase_device *kbdev) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - /* Use native secure ops */ - kbdev->secure_ops = &kbasep_secure_ops; - kbdev->secure_mode_support = true; - } -#ifdef SECURE_CALLBACKS - else { - kbdev->secure_ops = SECURE_CALLBACKS; - kbdev->secure_mode_support = false; - - if (kbdev->secure_ops) { - int err; + kbdev->protected_ops = NULL; - /* Make sure secure mode is disabled on startup */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); - - /* secure_mode_disable() returns -EINVAL if not - * supported - */ - kbdev->secure_mode_support = (err != -EINVAL); - } + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + /* Use native protected ops */ + kbdev->protected_ops = &kbasep_protected_ops; } +#ifdef PROTECTED_CALLBACKS + else + kbdev->protected_ops = PROTECTED_CALLBACKS; #endif + + if (kbdev->protected_ops) + kbdev->protected_mode_support = + kbdev->protected_ops->protected_mode_supported(kbdev); + else + kbdev->protected_mode_support = false; } #ifdef CONFIG_MALI_NO_MALI @@ -3508,6 +3156,7 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); + kbase_as_fault_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, @@ -3625,7 +3274,7 @@ static struct attribute *kbase_attrs[] = { &dev_attr_force_replay.attr, #endif &dev_attr_js_timeouts.attr, - &dev_attr_soft_event_timeout.attr, + &dev_attr_soft_job_timeout.attr, &dev_attr_gpuinfo.attr, &dev_attr_dvfs_period.attr, &dev_attr_pm_poweroff.attr, @@ -3898,7 +3547,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) kbase_device_coherency_init(kbdev, gpu_id); - kbasep_secure_mode_init(kbdev); + kbasep_protected_mode_init(kbdev); err = kbasep_js_devdata_init(kbdev); if (err) { @@ -4263,7 +3912,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter); void kbase_trace_mali_pm_status(u32 event, u64 value) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index bcd2569549c9..83c5c37942bd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -15,8 +15,8 @@ +#include #include -#include "mali_kbase_debug_job_fault.h" #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index 0930f905e4ef..a2bf8983c37c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,7 +18,6 @@ #ifndef _KBASE_DEBUG_JOB_FAULT_H #define _KBASE_DEBUG_JOB_FAULT_H -#include #include #include diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 28d93e30794f..a98355e33d07 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -221,6 +221,7 @@ out: kfree(mapping); } fput(kctx_file); + kfree(mem_data); } seq_release(i, file); return ret; diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index d0eae8e3486b..4bb8c2c7aec2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include @@ -177,20 +177,18 @@ #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) /** Atom has caused us to enter disjoint state */ #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -/* Atom has fail dependency on same-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6) /* Atom blocked on cross-slot dependency */ #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) /* Atom has fail dependency on cross-slot dependency */ #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom has been submitted to JSCTX ringbuffers */ -#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) /* Atom is currently holding a context reference */ #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in secure mode */ -#define KBASE_KATOM_FLAG_SECURE (1<<11) -/* Atom has been stored in linked list */ -#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -237,11 +235,11 @@ struct kbase_jd_atom_dependency { * * @return readonly reference to dependent ATOM. */ -static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); - return (const struct kbase_jd_atom * const)(dep->atom); + return (const struct kbase_jd_atom *)(dep->atom); } /** @@ -252,7 +250,7 @@ static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const st * * @return A dependency type value. */ -static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) +static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); @@ -303,13 +301,15 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + /* Atom is in slot ringbuffer but is waiting for proected mode exit */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT, /* Atom is in slot ringbuffer but is waiting for cores to become * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - /* Atom is in slot ringbuffer but is waiting for secure mode switch */ - KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE, + /* Atom is in slot ringbuffer but is waiting for protected mode entry */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY, /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, /* Atom is in slot ringbuffer and has been submitted to the GPU */ @@ -319,6 +319,23 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_RETURN_TO_JS }; +enum kbase_atom_exit_protected_state { + /* + * Starting state: + * Check if a transition out of protected mode is required. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK, + /* Wait for the L2 to become idle in preparation for the reset. */ + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + /* Issue the protected reset. */ + KBASE_ATOM_EXIT_PROTECTED_RESET, + /* + * End state; + * Wait for the reset to complete. + */ + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + struct kbase_ext_res { u64 gpu_address; struct kbase_mem_phy_alloc *alloc; @@ -335,6 +352,13 @@ struct kbase_jd_atom { struct list_head dep_head[2]; struct list_head dep_item[2]; const struct kbase_jd_atom_dependency dep[2]; + /* List head used during job dispatch job_done processing - as + * dependencies may not be entirely resolved at this point, we need to + * use a separate list head. */ + struct list_head jd_item; + /* true if atom's jd_item is currently on a list. Prevents atom being + * processed twice. */ + bool in_jd_list; u16 nr_extres; struct kbase_ext_res *extres; @@ -440,6 +464,11 @@ struct kbase_jd_atom { atomic_t blocked; + /* Pointer to atom that this atom has same-slot dependency on */ + struct kbase_jd_atom *pre_dep; + /* Pointer to atom that has same-slot dependency on this atom */ + struct kbase_jd_atom *post_dep; + /* Pointer to atom that this atom has cross-slot dependency on */ struct kbase_jd_atom *x_pre_dep; /* Pointer to atom that has cross-slot dependency on this atom */ @@ -454,6 +483,12 @@ struct kbase_jd_atom { struct base_job_fault_event fault_event; #endif + /* List head used for two different purposes: + * 1. Overflow list for JS ring buffers. If an atom is ready to run, + * but there is no room in the JS ring buffer, then the atom is put + * on the ring buffer's overflow list using this list node. + * 2. List of waiting soft jobs. + */ struct list_head queue; struct kbase_va_region *jit_addr_reg; @@ -461,11 +496,18 @@ struct kbase_jd_atom { /* If non-zero, this indicates that the atom will fail with the set * event_code when the atom is processed. */ enum base_jd_event_code will_fail_event_code; + + enum kbase_atom_exit_protected_state exit_protected_state; + + struct rb_node runnable_tree_node; + + /* 'Age' of atom relative to other atoms in the context. */ + u32 age; }; -static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) +static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) { - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE); + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); } /* @@ -787,27 +829,36 @@ struct kbase_pm_device_data { }; /** - * struct kbase_secure_ops - Platform specific functions for GPU secure mode - * operations - * @secure_mode_enable: Callback to enable secure mode on the GPU - * @secure_mode_disable: Callback to disable secure mode on the GPU + * struct kbase_protected_ops - Platform specific functions for GPU protected + * mode operations + * @protected_mode_enter: Callback to enter protected mode on the GPU + * @protected_mode_reset: Callback to reset the GPU and exit protected mode. + * @protected_mode_supported: Callback to check if protected mode is supported. */ -struct kbase_secure_ops { +struct kbase_protected_ops { /** - * secure_mode_enable() - Enable secure mode on the GPU + * protected_mode_enter() - Enter protected mode on the GPU * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - int (*secure_mode_enable)(struct kbase_device *kbdev); + int (*protected_mode_enter)(struct kbase_device *kbdev); /** - * secure_mode_disable() - Disable secure mode on the GPU + * protected_mode_reset() - Reset the GPU and exit protected mode * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - int (*secure_mode_disable)(struct kbase_device *kbdev); + int (*protected_mode_reset)(struct kbase_device *kbdev); + + /** + * protected_mode_supported() - Check if protected mode is supported + * @kbdev: The kbase device + * + * Return: 0 on success, non-zero on error + */ + bool (*protected_mode_supported)(struct kbase_device *kbdev); }; @@ -953,9 +1004,6 @@ struct kbase_device { struct kbase_context *kctx; u64 addr; - struct kbase_context *suspended_kctx; - struct kbase_uk_hwcnt_setup suspended_state; - struct kbase_instr_backend backend; } hwcnt; @@ -971,30 +1019,6 @@ struct kbase_device { struct kbase_trace *trace_rbuf; #endif - /* This is used to override the current job scheduler values for - * JS_SCHEDULING_PERIOD_NS - * JS_SOFT_STOP_TICKS - * JS_SOFT_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_SS - * JS_HARD_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_DUMPING - * JS_RESET_TICKS_SS - * JS_RESET_TICKS_CL - * JS_RESET_TICKS_DUMPING. - * - * These values are set via the js_timeouts sysfs file. - */ - u32 js_scheduling_period_ns; - int js_soft_stop_ticks; - int js_soft_stop_ticks_cl; - int js_hard_stop_ticks_ss; - int js_hard_stop_ticks_cl; - int js_hard_stop_ticks_dumping; - int js_reset_ticks_ss; - int js_reset_ticks_cl; - int js_reset_ticks_dumping; - bool js_timeouts_updated; - u32 reset_timeout_ms; struct mutex cacheclean_lock; @@ -1012,8 +1036,12 @@ struct kbase_device { unsigned long current_freq; unsigned long current_voltage; #ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + struct devfreq_cooling_device *devfreq_cooling; +#else struct thermal_cooling_device *devfreq_cooling; #endif +#endif #endif struct kbase_ipa_context *ipa_ctx; @@ -1034,6 +1062,11 @@ struct kbase_device { /* Root directory for per context entry */ struct dentry *debugfs_ctx_directory; +#ifdef CONFIG_MALI_DEBUG + /* bit for each as, set if there is new data to report */ + u64 debugfs_as_read_bitmap; +#endif /* CONFIG_MALI_DEBUG */ + /* failed job dump, used for separate debug process */ wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; @@ -1103,18 +1136,23 @@ struct kbase_device { u32 snoop_enable_smc; u32 snoop_disable_smc; - /* Secure operations */ - struct kbase_secure_ops *secure_ops; + /* Protected operations */ + struct kbase_protected_ops *protected_ops; + + /* + * true when GPU is put into protected mode + */ + bool protected_mode; /* - * true when GPU is put into secure mode + * true when GPU is transitioning into or out of protected mode */ - bool secure_mode; + bool protected_mode_transition; /* - * true if secure mode is supported + * true if protected mode is supported */ - bool secure_mode_support; + bool protected_mode_support; #ifdef CONFIG_MALI_DEBUG @@ -1135,79 +1173,22 @@ struct kbase_device { u32 inited_subsys; }; -/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be: - - >=2 (buffer size -> 4) - - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer - but this is unnecessary as max atoms is 256) - */ -#define JSCTX_RB_IDX_SHIFT (8U) -#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32)) -#error "Invalid ring buffer size for 32bit atomic." -#endif -#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */ -#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT) -#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U) -#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U) - -#define JSCTX_WR_OFFSET (0U) -#define JSCTX_RN_OFFSET (JSCTX_WR_OFFSET + JSCTX_RB_IDX_SHIFT) -#define JSCTX_RD_OFFSET (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT) - /** - * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue. - * @atom_id: Atom ID - */ -struct jsctx_rb_entry { - u16 atom_id; -}; - -/** - * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list. - * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct - * kbase_jd_atom pointers which make up the contents of the ring - * buffer. - * @indicies: An atomic variable containing indicies for the ring buffer. - * Indicies are of size JSCTX_RB_IDX_SHIFT. - * The following are contained: - * - WR_IDX - Write index. Index of the NEXT slot to be written. - * - RN_IDX - Running index. Index of the tail of the list. - * This is the atom that has been running the longest. - * - RD_IDX - Read index. Index of the next atom to be pulled. - * @queue_head: Head item of the linked list queue. - * - * Locking: - * The linked list assumes jctx.lock is held. - * The ringbuffer serves as an intermediary between irq context and non-irq - * context, without the need for the two to share any lock. irq context can - * pull (and unpull) and only requires the runpool_irq.lock. While non-irq - * context can add and remove and only requires holding only jctx.lock. - * Error handling affecting both, or the whole ringbuffer in general, must - * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread - * is accessing the buffer. - * This means that RD_IDX is updated by irq-context (pull and unpull) and must - * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by - * non-irq context and must hold jctx.lock. - * Note that pull (or sister function peek) must also access WR_IDX to ensure - * there is free space in the buffer, this is ok as WR_IDX is only increased. - * A similar situation is apparent with unpull and RN_IDX, but only one atom - * (already pulled) can cause either remove or unpull, so this will never - * conflict. + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. * - * &struct jsctx_queue is a queue of &struct kbase_jd_atom, - * part ringbuffer and part linked list. + * runpool_irq.lock must be held when accessing this structure. */ struct jsctx_queue { - struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; - - atomic_t indicies; - - struct list_head queue_head; + struct rb_root runnable_tree; + struct list_head x_dep_head; }; - - - #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) @@ -1334,9 +1315,6 @@ struct kbase_context { /* Bitmask of slots that can be pulled from */ u32 slots_pullable; - /* true if address space assignment is pending */ - bool as_pending; - /* Backend specific data */ struct kbase_context_backend backend; @@ -1359,10 +1337,10 @@ struct kbase_context { bool ctx_runnable_ref; /* Waiting soft-jobs will fail when this timer expires */ - struct hrtimer soft_event_timeout; + struct timer_list soft_job_timeout; /* JIT allocation management */ - struct kbase_va_region *jit_alloc[255]; + struct kbase_va_region *jit_alloc[256]; struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; @@ -1371,6 +1349,12 @@ struct kbase_context { /* External sticky resource management */ struct list_head ext_res_meta_head; + + /* Used to record that a drain was requested from atomic context */ + atomic_t drain_pending; + + /* Current age count, used to determine age for newly submitted atoms */ + u32 age_count; }; /** @@ -1426,7 +1410,7 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) } /* Conversion helpers for setting up high resolution timers */ -#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index c55779cbad4d..62ab0caf9858 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -30,6 +30,7 @@ #include #include +#include #include #include diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c index 29f3324a9ddd..4d3836a1d4ab 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -115,7 +115,7 @@ kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, struct reservation_object *content_res = NULL; unsigned int content_res_idx = 0; unsigned int r; - int err; + int err = 0; ww_acquire_init(ctx, &reservation_ww_class); @@ -138,7 +138,7 @@ error: content_res_idx = r; /* Unlock the locked one ones */ - for (r--; r >= 0; r--) + while (r--) ww_mutex_unlock(&info->resv_objs[r]->lock); if (content_res) @@ -197,6 +197,10 @@ kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom) atomic_dec(&katom->dma_fence.dep_count); } + /* + * Release the reference taken in + * kbase_dma_fence_add_callback(). + */ fence_put(cb->fence); list_del(&cb->node); kfree(cb); @@ -268,6 +272,21 @@ out: mutex_unlock(&ctx->lock); } +/** + * kbase_dma_fence_add_callback() - Add callback on @fence to block @katom + * @katom: Pointer to katom that will be blocked by @fence + * @fence: Pointer to fence on which to set up the callback + * @callback: Pointer to function to be called when fence is signaled + * + * Caller needs to hold a reference to @fence when calling this function, and + * the caller is responsible for releasing that reference. An additional + * reference to @fence will be taken when the callback was successfully set up + * and @fence needs to be kept valid until the callback has been called and + * cleanup have been done. + * + * Return: 0 on success: fence was either already signalled, or callback was + * set up. Negative error code is returned on error. + */ static int kbase_dma_fence_add_callback(struct kbase_jd_atom *katom, struct fence *fence, @@ -280,8 +299,6 @@ kbase_dma_fence_add_callback(struct kbase_jd_atom *katom, if (!kbase_fence_cb) return -ENOMEM; - fence_get(fence); - kbase_fence_cb->fence = fence; kbase_fence_cb->katom = katom; INIT_LIST_HEAD(&kbase_fence_cb->node); @@ -291,16 +308,18 @@ kbase_dma_fence_add_callback(struct kbase_jd_atom *katom, /* Fence signaled, clear the error and return */ err = 0; kbase_fence_cb->fence = NULL; - fence_put(fence); kfree(kbase_fence_cb); } else if (err) { - /* Do nothing, just return the error */ - fence_put(fence); kfree(kbase_fence_cb); } else { + /* + * Get reference to fence that will be kept until callback gets + * cleaned up in kbase_dma_fence_free_callbacks(). + */ + fence_get(fence); atomic_inc(&katom->dma_fence.dep_count); /* Add callback to katom's list of callbacks */ - list_add(&katom->dma_fence.callbacks, &kbase_fence_cb->node); + list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); } return err; @@ -350,8 +369,16 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, err = kbase_dma_fence_add_callback(katom, excl_fence, kbase_dma_fence_cb); + + /* Release our reference, taken by reservation_object_get_fences_rcu(), + * to the fence. We have set up our callback (if that was possible), + * and it's the fence's owner is responsible for singling the fence + * before allowing it to disappear. + */ + fence_put(excl_fence); + if (err) - goto error; + goto out; } if (exclusive) { @@ -360,19 +387,28 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, shared_fences[i], kbase_dma_fence_cb); if (err) - goto error; + goto out; } } - kfree(shared_fences); - return err; - -error: - /* Cancel and clean up all callbacks that was set up before the error. + /* Release all our references to the shared fences, taken by + * reservation_object_get_fences_rcu(). We have set up our callback (if + * that was possible), and it's the fence's owner is responsible for + * signaling the fence before allowing it to disappear. */ - kbase_dma_fence_free_callbacks(katom); +out: + for (i = 0; i < shared_count; i++) + fence_put(shared_fences[i]); kfree(shared_fences); + if (err) { + /* + * On error, cancel and clean up all callbacks that was set up + * before the error. + */ + kbase_dma_fence_free_callbacks(katom); + } + return err; } @@ -404,7 +440,6 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, lockdep_assert_held(&katom->kctx->jctx.lock); - atomic_set(&katom->dma_fence.dep_count, 1); fence = kbase_dma_fence_new(katom->dma_fence.context, atomic_inc_return(&katom->dma_fence.seqno)); if (!fence) { @@ -415,11 +450,14 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, } katom->dma_fence.fence = fence; + atomic_set(&katom->dma_fence.dep_count, 1); err = kbase_dma_fence_lock_reservations(info, &ww_ctx); if (err) { dev_err(katom->kctx->kbdev->dev, "Error %d locking reservations.\n", err); + atomic_set(&katom->dma_fence.dep_count, -1); + fence_put(fence); return err; } @@ -457,7 +495,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, end: kbase_dma_fence_unlock_reservations(info, &ww_ctx); - if (!err) { + if (likely(!err)) { /* Test if the callbacks are already triggered */ if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { atomic_set(&katom->dma_fence.dep_count, -1); @@ -468,6 +506,15 @@ end: */ kbase_dma_fence_waiters_add(katom); } + } else { + /* There was an error, cancel callbacks, set dep_count to -1 to + * indicate that the atom has been handled (the caller will + * kill it for us), signal the fence, free callbacks and the + * fence. + */ + kbase_dma_fence_free_callbacks(katom); + atomic_set(&katom->dma_fence.dep_count, -1); + kbase_dma_fence_signal(katom); } return err; diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index b9077ea78e5d..4af3e4815e95 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -20,7 +20,6 @@ #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" -#include "mali_kbase_instr.h" #define MALI_MAX_CORES_PER_GROUP 4 #define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h index 682f98efcd13..5ea06770fdb2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_tmix.h @@ -206,19 +206,19 @@ static const char * const hardware_counters_mali_tMIx[] = { "TMIx_LS_MEM_WRITE_SHORT", "TMIx_LS_MEM_ATOMIC", "TMIx_VARY_INSTR", - "", - "", + "TMIx_VARY_SLOT_32", + "TMIx_VARY_SLOT_16", "TMIx_ATTR_INSTR", "TMIx_ARITH_INSTR_FP_MUL", - "", - "", - "", - "", - "", - "", - "", - "", - "", + "TMIx_BEATS_RD_FTC", + "TMIx_BEATS_RD_FTC_EXT", + "TMIx_BEATS_RD_LSC", + "TMIx_BEATS_RD_LSC_EXT", + "TMIx_BEATS_RD_TEX", + "TMIx_BEATS_RD_TEX_EXT", + "TMIx_BEATS_RD_OTHER", + "TMIx_BEATS_WR_LSC", + "TMIx_BEATS_WR_TIB", "", /* Performance counters for the Memory System */ @@ -259,11 +259,11 @@ static const char * const hardware_counters_mali_tMIx[] = { "TMIx_L2_EXT_AR_CNT_Q1", "TMIx_L2_EXT_AR_CNT_Q2", "TMIx_L2_EXT_AR_CNT_Q3", - "", - "", - "", - "", - "", + "TMIx_L2_EXT_RRESP_0_127", + "TMIx_L2_EXT_RRESP_128_191", + "TMIx_L2_EXT_RRESP_192_255", + "TMIx_L2_EXT_RRESP_256_319", + "TMIx_L2_EXT_RRESP_320_383", "TMIx_L2_EXT_WRITE", "TMIx_L2_EXT_WRITE_NOSNP_FULL", "TMIx_L2_EXT_WRITE_NOSNP_PTL", diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c index 82f4c36d509e..6df0a1cb1264 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS /** Show callback for the @c gpu_memory debugfs file. diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h index 3cf30a4e767e..7045693eb910 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,10 +23,9 @@ * */ -#ifndef _KBASE_GPU_MEMORY_H -#define _KBASE_GPU_MEMORY_H +#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H +#define _KBASE_GPU_MEMORY_DEBUGFS_H -#include #include #include @@ -35,4 +34,4 @@ */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); -#endif /*_KBASE_GPU_MEMORY_H*/ +#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index 2efa293088a1..abe66078029f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,9 +71,7 @@ void kbase_backend_release_free_address_space(struct kbase_device *kbdev, * * kbase_gpu_next_job() will pull atoms from the active context. * - * Return: true if successful, false if ASID not assigned. If kctx->as_pending - * is true then ASID assignment will complete at some point in the - * future and will re-start scheduling, otherwise no ASIDs are available + * Return: true if successful, false if ASID not assigned. */ bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -213,6 +211,15 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); +/** + * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg updating timeouts of + * currently running atoms). + */ +void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + /** * kbase_backend_slot_free() - Return the number of jobs that can be currently * submitted to slot @js. @@ -319,6 +326,28 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); * signalled to know when the reset has completed. */ void kbase_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_silent - Reset the GPU silently + * @kbdev: Device pointer + * + * Reset the GPU without trying to cancel jobs and don't emit messages into + * the kernel log while doing the reset. + * + * This function should be used in cases where we are doing a controlled reset + * of the GPU as part of normal processing (e.g. exiting protected mode) where + * the driver will have ensured the scheduler has been idled and all other + * users of the GPU (e.g. instrumentation) have been suspended. + */ +void kbase_reset_gpu_silent(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_active - Reports if the GPU is being reset + * @kbdev: Device pointer + * + * Return: True if the GPU is in the process of being reset. + */ +bool kbase_reset_gpu_active(struct kbase_device *kbdev); #endif /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c deleted file mode 100644 index fda317b90176..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_instr.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/* - * Base kernel instrumentation APIs. - */ - -#include -#include - -void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev) -{ - struct kbase_context *kctx; - - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx); - - kctx = kbdev->hwcnt.kctx; - kbdev->hwcnt.suspended_kctx = kctx; - - /* Relevant state was saved into hwcnt.suspended_state when enabling the - * counters */ - - if (kctx) { - KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED); - kbase_instr_hwcnt_disable(kctx); - } -} - -void kbase_instr_hwcnt_resume(struct kbase_device *kbdev) -{ - struct kbase_context *kctx; - - KBASE_DEBUG_ASSERT(kbdev); - - kctx = kbdev->hwcnt.suspended_kctx; - kbdev->hwcnt.suspended_kctx = NULL; - - if (kctx) { - int err; - - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, - &kbdev->hwcnt.suspended_state); - WARN(err, "Failed to restore instrumented hardware counters on resume\n"); - } -} - -int kbase_instr_hwcnt_enable(struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup) -{ - struct kbase_device *kbdev; - int err; - - kbdev = kctx->kbdev; - - /* Mark the context as active so the GPU is kept turned on */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread. */ - kbase_pm_context_active(kbdev); - - /* Schedule the context in */ - kbasep_js_schedule_privileged_ctx(kbdev, kctx); - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup); - if (err) { - /* Release the context. This had its own Power Manager Active - * reference */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference */ - kbase_pm_context_idle(kbdev); - } - - return err; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable); - -int kbase_instr_hwcnt_disable(struct kbase_context *kctx) -{ - int err = -EINVAL; - struct kbase_device *kbdev = kctx->kbdev; - - err = kbase_instr_hwcnt_disable_internal(kctx); - if (err) - goto out; - - /* Release the context. This had its own Power Manager Active reference - */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference */ - kbase_pm_context_idle(kbdev); - - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", - kctx); -out: - return err; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable); - -int kbase_instr_hwcnt_dump(struct kbase_context *kctx) -{ - int err; - - err = kbase_instr_hwcnt_request_dump(kctx); - if (err) - return err; - - err = kbase_instr_hwcnt_wait_for_dump(kctx); - return err; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump); - diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h deleted file mode 100644 index ac3355e53634..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_instr.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/* - * Instrumentation API definitions - */ - -#ifndef _KBASE_INSTR_H_ -#define _KBASE_INSTR_H_ - -#include - -/** - * kbase_instr_hwcnt_enable() - Enable HW counters collection - * @kctx: Kbase context - * @setup: &struct kbase_uk_hwcnt_setup containing configuration - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_enable(struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup); - -/** - * kbase_instr_hwcnt_disable() - Disable HW counters collection - * @kctx: Kbase context - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_disable(struct kbase_context *kctx); - -/** - * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for - * completion - * @kctx: Kbase context - * - * Context: might sleep, waiting for dump to complete - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_dump(struct kbase_context *kctx); - -/** - * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection - * @kbdev: Kbase device - * - * It's assumed that there's only one privileged context. - * - * Safe to do this without lock when doing an OS suspend, because it only - * changes in response to user-space IOCTLs - */ -void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev); - -/** - * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection - * @kbdev: Kbase device - */ -void kbase_instr_hwcnt_resume(struct kbase_device *kbdev); - -#endif /* _KBASE_INSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 0ffd8beb29fe..3e0a5892cc7a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -46,7 +45,7 @@ /* Return whether katom will run on the GPU or not. Currently only soft jobs and * dependency-only atoms do not run on the GPU */ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ - ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) /* * This is the kernel side of the API. Only entry points are: @@ -81,7 +80,7 @@ static int jd_run_atom(struct kbase_jd_atom *katom) KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; return 0; @@ -91,7 +90,7 @@ static int jd_run_atom(struct kbase_jd_atom *katom) katom->status = KBASE_JD_ATOM_STATE_COMPLETED; return 0; } - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (!kbase_replay_process(katom)) katom->status = KBASE_JD_ATOM_STATE_COMPLETED; @@ -385,7 +384,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && (reg->flags & KBASE_REG_SECURE)) { - katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; + katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } alloc = kbase_map_external_resource(katom->kctx, reg, @@ -524,17 +523,17 @@ failed_kds_setup: static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, - u8 d) + u8 d, bool ctx_is_dying) { u8 other_d = !d; while (!list_empty(&katom->dep_head[d])) { struct kbase_jd_atom *dep_atom; + struct kbase_jd_atom *other_dep_atom; u8 dep_type; dep_atom = list_entry(katom->dep_head[d].next, struct kbase_jd_atom, dep_item[d]); - list_del(katom->dep_head[d].next); dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); @@ -568,7 +567,13 @@ static inline void jd_resolve_dep(struct list_head *out_list, KBASE_JD_ATOM_STATE_COMPLETED; } } - if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + other_dep_atom = (struct kbase_jd_atom *) + kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + + if (!dep_atom->in_jd_list && (!other_dep_atom || + (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && + !dep_atom->will_fail_event_code && + !other_dep_atom->will_fail_event_code))) { bool dep_satisfied = true; #ifdef CONFIG_MALI_DMA_FENCE int dep_count; @@ -602,8 +607,10 @@ static inline void jd_resolve_dep(struct list_head *out_list, dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; #endif - if (dep_satisfied) - list_add_tail(&dep_atom->dep_item[0], out_list); + if (dep_satisfied) { + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); + } } } } @@ -646,7 +653,7 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; - if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY && (dep_atom->core_req & kbdev->force_replay_core_req) == kbdev->force_replay_core_req) { @@ -658,6 +665,36 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) } #endif +static void jd_try_submitting_deps(struct list_head *out_list, + struct kbase_jd_atom *node) +{ + int i; + + for (i = 0; i < 2; i++) { + struct list_head *pos; + + list_for_each(pos, &node->dep_head[i]) { + struct kbase_jd_atom *dep_atom = list_entry(pos, + struct kbase_jd_atom, dep_item[i]); + + if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { + /*Check if atom deps look sane*/ + bool dep0_valid = !dep_atom->dep[0].atom || + (dep_atom->dep[0].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + bool dep1_valid = !dep_atom->dep[1].atom || + (dep_atom->dep[1].atom->status + >= KBASE_JD_ATOM_STATE_IN_JS); + + if (dep0_valid && dep1_valid) { + dep_atom->in_jd_list = true; + list_add(&dep_atom->jd_item, out_list); + } + } + } + } +} + /* * Perform the necessary handling of an atom that has finished running * on the GPU. @@ -712,16 +749,16 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&katom->dep_item[0], &completed_jobs); + list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { - katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]); + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); list_del(completed_jobs.prev); - KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) - jd_resolve_dep(&runnable_jobs, katom, i); + jd_resolve_dep(&runnable_jobs, katom, i, + kctx->jctx.sched_info.ctx.is_dying); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -730,9 +767,9 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, struct kbase_jd_atom *node; node = list_entry(runnable_jobs.next, - struct kbase_jd_atom, dep_item[0]); - + struct kbase_jd_atom, jd_item); list_del(runnable_jobs.next); + node->in_jd_list = false; KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -742,38 +779,43 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } else { node->event_code = katom->event_code; - if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE) - == BASE_JD_REQ_SOFT_REPLAY) { + if ((node->core_req & + BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(node)) /* Don't complete this atom */ continue; } else if (node->core_req & BASE_JD_REQ_SOFT_JOB) { - /* If this is a fence wait then remove it from the list of sync waiters. */ + /* If this is a fence wait soft job + * then remove it from the list of sync + * waiters. + */ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) - list_del(&node->dep_item[0]); + kbasep_remove_waiting_soft_job(node); kbase_finish_soft_job(node); } node->status = KBASE_JD_ATOM_STATE_COMPLETED; } - if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) - list_add_tail(&node->dep_item[0], &completed_jobs); + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { + list_add_tail(&node->jd_item, &completed_jobs); + } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && + !node->will_fail_event_code) { + /* Node successfully submitted, try submitting + * dependencies as they may now be representable + * in JS */ + jd_try_submitting_deps(&runnable_jobs, node); + } } - /* Completing an atom might have freed up space - * in the ringbuffer, but only on that slot. */ - jsctx_ll_flush_to_rb(kctx, - katom->sched_priority, - katom->slot_nr); - /* Register a completed job as a disjoint event when the GPU * is in a disjoint state (ie. being reset or replaying jobs). */ kbase_disjoint_event_potential(kctx->kbdev); if (completed_jobs_ctx) - list_add_tail(&katom->dep_item[0], completed_jobs_ctx); + list_add_tail(&katom->jd_item, completed_jobs_ctx); else kbase_event_post(kctx, katom); @@ -845,12 +887,9 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif -bool jd_submit_atom(struct kbase_context *kctx, - const struct base_jd_atom_v2 *user_atom, - struct kbase_jd_atom *katom) +bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) { struct kbase_jd_context *jctx = &kctx->jctx; - base_jd_core_req core_req; int queued = 0; int i; int sched_prio; @@ -861,8 +900,6 @@ bool jd_submit_atom(struct kbase_context *kctx, * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ jctx->job_nr++; - core_req = user_atom->core_req; - katom->start_timestamp.tv64 = 0; katom->time_spent_us = 0; katom->udata = user_atom->udata; @@ -873,13 +910,19 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->affinity = 0; katom->jc = user_atom->jc; katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->core_req = core_req; + katom->core_req = user_atom->core_req; katom->atom_flags = 0; katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; + katom->pre_dep = NULL; + katom->post_dep = NULL; katom->x_pre_dep = NULL; katom->x_post_dep = NULL; - katom->will_fail_event_code = 0; + katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; + katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom->age = kctx->age_count++; + + INIT_LIST_HEAD(&katom->jd_item); #ifdef CONFIG_KDS /* Start by assuming that the KDS dependencies are satisfied, * kbase_jd_pre_external_resources will correct this if there are dependencies */ @@ -958,7 +1001,7 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) { ret = false; @@ -1020,7 +1063,7 @@ bool jd_submit_atom(struct kbase_context *kctx, } /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; ret = jd_done_nolock(katom, NULL); @@ -1105,6 +1148,7 @@ bool jd_submit_atom(struct kbase_context *kctx, } #endif /* CONFIG_KDS */ + #ifdef CONFIG_MALI_DMA_FENCE if (atomic_read(&katom->dma_fence.dep_count) != -1) { ret = false; @@ -1112,7 +1156,7 @@ bool jd_submit_atom(struct kbase_context *kctx, } #endif /* CONFIG_MALI_DMA_FENCE */ - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) ret = false; @@ -1128,7 +1172,7 @@ bool jd_submit_atom(struct kbase_context *kctx, } ret = false; - } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; ret = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ @@ -1214,7 +1258,7 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.udata = user_atom_v6.udata; user_atom.extres_list = user_atom_v6.extres_list; user_atom.nr_extres = user_atom_v6.nr_extres; - user_atom.core_req = user_atom_v6.core_req; + user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff); /* atom number 0 is used for no dependency atoms */ if (!user_atom_v6.pre_dep[0]) @@ -1246,6 +1290,12 @@ int kbase_jd_submit(struct kbase_context *kctx, } #endif /* BASE_LEGACY_UK6_SUPPORT */ +#ifdef BASE_LEGACY_UK10_2_SUPPORT + if (KBASE_API_VERSION(10, 3) > kctx->api_version) + user_atom.core_req = (u32)(user_atom.compat_core_req + & 0x7fff); +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride); mutex_lock(&jctx->lock); @@ -1320,7 +1370,6 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbase_jd_context *jctx; struct kbase_context *kctx; struct kbasep_js_kctx_info *js_kctx_info; - union kbasep_js_policy *js_policy; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; u64 cache_jc = katom->jc; @@ -1339,7 +1388,6 @@ void kbase_jd_done_worker(struct work_struct *data) kbdev = kctx->kbdev; js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - js_policy = &kbdev->js_data.policy; KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1481,7 +1529,7 @@ void kbase_jd_done_worker(struct work_struct *data) while (!list_empty(&kctx->completed_jobs)) { struct kbase_jd_atom *atom = list_entry( kctx->completed_jobs.next, - struct kbase_jd_atom, dep_item[0]); + struct kbase_jd_atom, jd_item); list_del(kctx->completed_jobs.next); kbase_event_post(kctx, atom); @@ -1660,9 +1708,9 @@ void kbase_jd_zap_context(struct kbase_context *kctx) * queued outside the job scheduler. */ - hrtimer_cancel(&kctx->soft_event_timeout); + del_timer_sync(&kctx->soft_job_timeout); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); + katom = list_entry(entry, struct kbase_jd_atom, queue); kbase_cancel_soft_job(katom); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index c1a8e9ceb2fa..ac6c3ce333ed 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -230,32 +230,6 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, return result; } -/* Helper macros to access and modify jsctx_queue.indicies */ -#define JSCTX_GET(offset, var, mask) \ - ((var >> offset) & mask) - -/* This wraps around to correct integer size automatically. */ -#define JSCTX_SET(var, offset, value, mask) \ - (var = ((var & ~(mask << offset)) /*Clear old bits */ \ - | (((value) & mask) << offset))) /* Set (after masking) new bits */ - -#define JSCTX_GET_WR_IDX(var) \ - JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE) -#define JSCTX_GET_RN_IDX(var) \ - JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE) -#define JSCTX_GET_RD_IDX(var) \ - JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE) - -#define JSCTX_GET_IDX_DIFF(lower, upper) \ - ((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower)) - -#define JSCTX_SET_WR_IDX(var, value) \ - JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE) -#define JSCTX_SET_RN_IDX(var, value) \ - JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE) -#define JSCTX_SET_RD_IDX(var, value) \ - JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE) - /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms * @kctx: Pointer to kbase context with ring buffer. @@ -273,9 +247,10 @@ static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - unsigned int var = atomic_read(&rb->indicies); - return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + return RB_EMPTY_ROOT(&rb->runnable_tree); } /** @@ -304,27 +279,6 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) return true; } -/** - * jsctx_rb_is_full(): - Check if the given ringbuffer is full. - * @queue: Pointer to the queue containing the ringbuffer. - * - * No locks explicitly required, result will always be consistent. - * But depending on usage, the caller should consider jctx.lock, - * for the result to remain correct. - * - * Return: true if the ringbuffer is full, false otherwise. - */ -static inline bool -jsctx_rb_is_full(struct jsctx_queue *queue) -{ - unsigned int var = atomic_read(&queue->indicies); - u16 rn_idx = JSCTX_GET_RN_IDX(var); - u16 wr_idx = JSCTX_GET_WR_IDX(var); - - return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE; -} - - /** * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. * @kctx: Pointer to kbase context with the queue. @@ -332,13 +286,13 @@ jsctx_rb_is_full(struct jsctx_queue *queue) * @prio: Priority id to iterate. * @callback: Function pointer to callback. * - * Iterate over a ring buffer and invoke @callback for each entry in buffer, and - * remove the entry from the buffer. + * Iterate over a queue and invoke @callback for each entry in the queue, and + * remove the entry from the queue. * - * If entries are added to the ring buffer while this is running those entries - * may, or may not be covered. To ensure that all entries in the buffer have - * been enumerated when this function returns jsctx->lock must be held when - * calling this function. + * If entries are added to the queue while this is running those entries may, or + * may not be covered. To ensure that all entries in the buffer have been + * enumerated when this function returns jsctx->lock must be held when calling + * this function. * * The HW access lock, js_data.runpool_irq.lock, must always be held when * calling this function. @@ -348,38 +302,24 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, kbasep_js_policy_ctx_job_cb callback) { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - struct kbase_jd_atom *katom; - - struct list_head *pos, *q; - - unsigned int var = atomic_read(&queue->indicies); - u16 running_idx = JSCTX_GET_RN_IDX(var); - u16 read_idx = JSCTX_GET_RD_IDX(var); - u16 wr_idx = JSCTX_GET_WR_IDX(var); - u16 i; - const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* There must be no jobs currently in HW access */ - WARN_ON(read_idx != JSCTX_GET_RN_IDX(var)); - - /* Invoke callback on all kbase_jd_atoms in the ring buffer, and - * removes them from the buffer */ - for (i = 0; i < count; i++) { - int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id; + while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { + struct rb_node *node = rb_first(&queue->runnable_tree); + struct kbase_jd_atom *entry = rb_entry(node, + struct kbase_jd_atom, runnable_tree_node); - katom = kbase_jd_atom_from_id(kctx, id); - read_idx++; - callback(kctx->kbdev, katom); + rb_erase(node, &queue->runnable_tree); + callback(kctx->kbdev, entry); } - atomic_set(&queue->indicies, 0); - list_for_each_safe(pos, q, &queue->queue_head) { - struct kbase_jd_atom *entry; + while (!list_empty(&queue->x_dep_head)) { + struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, + struct kbase_jd_atom, queue); + + list_del(queue->x_dep_head.next); - entry = list_entry(pos, struct kbase_jd_atom, queue); - list_del(pos); callback(kctx->kbdev, entry); } } @@ -419,16 +359,15 @@ static inline struct kbase_jd_atom * jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - int id; - unsigned int var = atomic_read(&rb->indicies); + struct rb_node *node; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var)) + node = rb_first(&rb->runnable_tree); + if (!node) return NULL; - id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id; - return kbase_jd_atom_from_id(kctx, id); + return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); } /** @@ -462,40 +401,6 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) return NULL; } -/** - * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority id to check. - * - * Check the ring buffer for the specified @js and @prio and return a - * pointer to the last atom, unless all the priority's ring buffers are empty. - * - * The last atom is the atom that was added using jsctx_rb_add() most recently. - * - * Return: Pointer to last atom in buffer, or NULL if there is no atom. - */ -static inline struct kbase_jd_atom * -jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - unsigned int var = atomic_read(&rb->indicies); - int id; - - lockdep_assert_held(&kctx->jctx.lock); - - if (!list_empty(&rb->queue_head)) { - return list_entry(rb->queue_head.prev, - struct kbase_jd_atom, queue); - } - - if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var)) - return NULL; - - id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id; - return kbase_jd_atom_from_id(kctx, id); -} - /** * jsctx_rb_pull(): - Mark atom in list as running * @kctx: Pointer to kbase context with ring buffer. @@ -510,7 +415,6 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - unsigned int oldvar, var; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); @@ -518,246 +422,65 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - do { - u16 rd_idx; - - oldvar = atomic_read(&rb->indicies); - var = oldvar; - rd_idx = JSCTX_GET_RD_IDX(var); - - JSCTX_SET_RD_IDX(var, rd_idx+1); - } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); + rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); } -/** - * jsctx_rb_unpull(): - Undo marking of atom in list as running - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to unpull. - * - * Undo jsctx_rb_pull() and put @katom back in the queue. - * - * jsctx_rb_unpull() must be called on atoms in the same order the atoms were - * pulled. - */ -static inline void -jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int prio = katom->sched_priority; - int js = katom->slot_nr; - unsigned int oldvar, var; - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - do { - u16 rd_idx; - - oldvar = atomic_read(&rb->indicies); - var = oldvar; - - - rd_idx = JSCTX_GET_RD_IDX(var)-1; - - /* Atoms must be unpulled in correct order. */ - WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); - - JSCTX_SET_RD_IDX(var, rd_idx); - } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); -} +#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) -/** - * jsctx_rb_add(): - Add atom to ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to add. - * - * Add @katom to the ring buffer determined by the atom's priority and job slot - * number. - * - * If the ring buffer is full -EBUSY will be returned. - * - * Return: On success 0 is returned, on failure a negative error code. - */ -static int -jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) +static void +jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - unsigned int oldvar, var; - u16 wr_idx, running_idx, count; - - lockdep_assert_held(&kctx->jctx.lock); - - oldvar = atomic_read(&rb->indicies); - var = oldvar; - - running_idx = JSCTX_GET_RN_IDX(var); - wr_idx = JSCTX_GET_WR_IDX(var); - count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); - - /* Check if the ring buffer is full */ - if (count >= JSCTX_RB_SIZE) - return -EBUSY; - - rb->entries[wr_idx & JSCTX_RB_MASK].atom_id = - kbase_jd_atom_id(kctx, katom); + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; - wr_idx++; - JSCTX_SET_WR_IDX(var, wr_idx); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) { - oldvar = atomic_read(&rb->indicies); - var = oldvar; - wr_idx = JSCTX_GET_WR_IDX(var)+1; + while (*new) { + struct kbase_jd_atom *entry = container_of(*new, + struct kbase_jd_atom, runnable_tree_node); - JSCTX_SET_WR_IDX(var, wr_idx); + parent = *new; + if (LESS_THAN_WRAP(katom->age, entry->age)) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); } - return 0; + + /* Add new node and rebalance tree. */ + rb_link_node(&katom->runnable_tree_node, parent, new); + rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); } /** - * jsctx_rb_remove(): - Remove atom from ring buffer + * jsctx_rb_unpull(): - Undo marking of atom in list as running * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to remove. + * @katom: Pointer to katom to unpull. * - * Remove @katom from the ring buffer. + * Undo jsctx_rb_pull() and put @katom back in the queue. * - * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and - * atoms must be removed in the same order they were pulled from the ring - * buffer. + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. */ static inline void -jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int prio = katom->sched_priority; - int js = katom->slot_nr; - unsigned int oldvar, var; - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - - lockdep_assert_held(&kctx->jctx.lock); - - - do { - unsigned int rn_idx; - - oldvar = atomic_read(&rb->indicies); - var = oldvar; - - rn_idx = JSCTX_GET_RN_IDX(var); - - JSCTX_SET_RN_IDX(var, rn_idx+1); - } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); -} - - -static void -jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - - lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - list_add_tail(&katom->queue, &queue->queue_head); + jsctx_tree_add(kctx, katom); } static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, bool is_scheduled); -static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js); -static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js); -void -jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js) -{ - unsigned long flags; - struct list_head *pos, *q; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - bool flushed_any = false; - struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; - bool enqueue_required = false; - - lockdep_assert_held(&kctx->jctx.lock); - - - /* Early out for common case */ - if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue)) - return; - - - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - - - spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags); - /* If slot will transition from unpullable to pullable then add to - * pullable list */ - if (jsctx_rb_none_to_pull(kctx, js)) - enqueue_required = true; - else - enqueue_required = false; - - list_for_each_safe(pos, q, &queue->queue_head) { - struct kbase_jd_atom *katom; - - katom = list_entry(pos, struct kbase_jd_atom, queue); - - KBASE_DEBUG_ASSERT(katom); - - if (jsctx_rb_add_atom(kctx, katom)) - break; - - katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL; - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; - flushed_any = true; - - list_del(pos); - } - - - if (flushed_any) { - bool timer_sync = false; - - if (enqueue_required) { - if (kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = kbase_js_ctx_list_add_pullable( - kctx->kbdev, kctx, js); - else - timer_sync = kbase_js_ctx_list_add_unpullable( - kctx->kbdev, kctx, js); - /* If this context is active and the atom is the first - * on its slot, kick the job manager to attempt to - * fast-start the atom */ - if (kctx == kctx->kbdev->hwaccess.active_kctx) - kbase_jm_try_kick(kctx->kbdev, 1 << js); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); - - if (timer_sync) - kbase_backend_ctx_count_changed(kctx->kbdev); - - } else { - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); - } - } else { - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - } - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - -} - /* * Functions private to KBase ('Protected' functions) */ @@ -829,7 +552,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; jsdd->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; - atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT); + atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -856,8 +579,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->cfs_ctx_runtime_init_slices); dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", jsdd->cfs_ctx_runtime_min_slices); - dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i", - atomic_read(&jsdd->soft_event_timeout_ms)); + dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", + atomic_read(&jsdd->soft_job_timeout_ms)); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && @@ -1004,8 +727,8 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { - INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head); - atomic_set(&kctx->jsctx_queue[i][j].indicies, 0); + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); + kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; } } @@ -1063,30 +786,25 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } /** - * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot - * pullable context queue + * kbase_js_ctx_list_add_pullable_nolock - Variant of + * kbase_jd_ctx_list_add_pullable() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the tail. - * - * This function should be used when queueing a context for the first time, or - * re-queueing a context that has been pulled from. - * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -1109,30 +827,24 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_add_pullable_head - Add context to the head of the - * per-slot pullable context queue + * kbase_js_ctx_list_add_pullable_head_nolock - Variant of + * kbase_js_ctx_list_add_pullable_head() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the head. - * - * This function should be used when a context has been scheduled, but no jobs - * can currently be pulled from it. - * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) +static bool kbase_js_ctx_list_add_pullable_head_nolock( + struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -1155,8 +867,37 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot - * unpullable context queue + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + return ret; +} + +/** + * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the + * per-slot unpullable context queue * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use @@ -1167,18 +908,17 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * This function should be used when a context has been pulled from, and there * are no jobs remaining on the specified slot. * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js]); @@ -1198,8 +938,8 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or - * unpullable context queues + * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable + * or unpullable context queues * @kbdev: Device pointer * @kctx: Context to remove from queue * @js: Job slot to use @@ -1209,18 +949,17 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, * This function should be used when a context has no jobs on the GPU, and no * jobs remaining for the specified slot. * - * Caller must hold kbasep_jd_device_data.queue_mutex + * Caller must hold runpool_irq.lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.queue_mutex); - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); @@ -1241,23 +980,24 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable - * queue. + * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() + * where the caller must hold + * runpool_irq.lock * @kbdev: Device pointer * @js: Job slot to use * - * Caller must hold kbasep_jd_device_data::queue_mutex + * Caller must hold runpool_irq.lock * * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head( +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( struct kbase_device *kbdev, int js) { struct kbase_context *kctx; - lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) return NULL; @@ -1271,6 +1011,28 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( return kctx; } +/** + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. + * @kbdev: Device pointer + * @js: Job slot to use + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head( + struct kbase_device *kbdev, int js) +{ + struct kbase_context *kctx; + unsigned long flags; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + return kctx; +} + /** * kbase_js_ctx_pullable - Return if a context can be pulled from on the * specified slot @@ -1334,8 +1096,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & - (KBASE_KATOM_FLAG_JSCTX_IN_LL | - KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){ + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { ret = false; break; } @@ -1354,6 +1115,12 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, ret = false; break; } + /* Each dependee atom can only have one + * same-slot dependency */ + if (dep_atom->post_dep) { + ret = false; + break; + } has_dep = true; } else { /* Only one cross-slot dependency can be @@ -1393,21 +1160,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, has_x_dep = true; } - if (kbase_jd_katom_dep_type(&katom->dep[i]) == - BASE_JD_DEP_TYPE_DATA && - js == dep_js) { - struct kbase_jd_atom *last_atom = - jsctx_rb_peek_last(kctx, js, - prio); - - /* Last atom on slot must be pre-dep for this - * atom */ - if (last_atom != dep_atom) { - ret = false; - break; - } - } - /* Dependency can be represented in ringbuffers */ } } @@ -1441,9 +1193,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } if ((kbase_jd_katom_dep_type(&katom->dep[i]) == BASE_JD_DEP_TYPE_DATA) && - (js == dep_js)) - katom->atom_flags |= - KBASE_KATOM_FLAG_FAIL_PREV; + (js == dep_js)) { + katom->pre_dep = dep_atom; + dep_atom->post_dep = katom; + } list_del(&katom->dep_item[i]); kbase_jd_katom_dep_clear(&katom->dep[i]); @@ -1519,11 +1272,11 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (enqueue_required) { if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) - timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, - atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom->slot_nr); else - timer_sync = kbase_js_ctx_list_add_unpullable(kbdev, - kctx, atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_unpullable_nolock( + kbdev, kctx, atom->slot_nr); } /* If this context is active and the atom is the first on its slot, * kick the job manager to attempt to fast-start the atom */ @@ -1904,7 +1657,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( wake_up(&js_kctx_info->ctx.is_scheduled_wait); /* Queue an action to occur after we've dropped the lock */ - release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED; + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | + KBASEP_JS_RELEASE_RESULT_SCHED_ALL; } else { kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); @@ -1962,14 +1716,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; - base_jd_event_code event_code; kbasep_js_release_result release_result; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - event_code = katom_retained_state->event_code; mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -2041,77 +1793,11 @@ static void kbasep_js_runpool_release_ctx_no_schedule( */ } -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -static void kbase_js_set_timeouts(struct kbase_device *kbdev) +void kbase_js_set_timeouts(struct kbase_device *kbdev) { - struct kbasep_js_device_data *js_data = &kbdev->js_data; - - if (kbdev->js_scheduling_period_ns < 0) - js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; - else if (kbdev->js_scheduling_period_ns > 0) - js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns; - - if (kbdev->js_soft_stop_ticks < 0) - js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; - else if (kbdev->js_soft_stop_ticks > 0) - js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks; - - if (kbdev->js_soft_stop_ticks_cl < 0) - js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; - else if (kbdev->js_soft_stop_ticks_cl > 0) - js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl; - - if (kbdev->js_hard_stop_ticks_ss < 0) { - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - js_data->hard_stop_ticks_ss = - DEFAULT_JS_HARD_STOP_TICKS_SS_8408; - else - js_data->hard_stop_ticks_ss = - DEFAULT_JS_HARD_STOP_TICKS_SS; - } else if (kbdev->js_hard_stop_ticks_ss > 0) { - js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss; - } - - if (kbdev->js_hard_stop_ticks_cl < 0) - js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; - else if (kbdev->js_hard_stop_ticks_cl > 0) - js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl; - - if (kbdev->js_hard_stop_ticks_dumping < 0) - js_data->hard_stop_ticks_dumping = - DEFAULT_JS_HARD_STOP_TICKS_DUMPING; - else if (kbdev->js_hard_stop_ticks_dumping > 0) - js_data->hard_stop_ticks_dumping = - kbdev->js_hard_stop_ticks_dumping; - - if (kbdev->js_reset_ticks_ss < 0) { - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - js_data->gpu_reset_ticks_ss = - DEFAULT_JS_RESET_TICKS_SS_8408; - else - js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; - } else if (kbdev->js_reset_ticks_ss > 0) { - js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss; - } - - if (kbdev->js_reset_ticks_cl < 0) - js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; - else if (kbdev->js_reset_ticks_cl > 0) - js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl; + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - if (kbdev->js_reset_ticks_dumping < 0) - js_data->gpu_reset_ticks_dumping = - DEFAULT_JS_RESET_TICKS_DUMPING; - else if (kbdev->js_reset_ticks_dumping > 0) - js_data->gpu_reset_ticks_dumping = - kbdev->js_reset_ticks_dumping; + kbase_backend_timeouts_changed(kbdev); } static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, @@ -2158,16 +1844,6 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, 0u, kbasep_js_trace_get_refcnt(kbdev, kctx)); - if (js_devdata->nr_user_contexts_running == 0 && - kbdev->js_timeouts_updated) { - /* Only when there are no other contexts submitting jobs: - * Latch in run-time job scheduler timeouts that were set - * through js_timeouts sysfs file */ - kbase_js_set_timeouts(kbdev); - - kbdev->js_timeouts_updated = false; - } - js_kctx_info->ctx.is_scheduled = true; mutex_lock(&new_address_space->transaction_mutex); @@ -2177,13 +1853,10 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&new_address_space->transaction_mutex); - /* If address space is not pending, then kbase_backend_use_ctx() - * failed. Roll back the transaction so far and return */ - if (!kctx->as_pending) { - js_kctx_info->ctx.is_scheduled = false; + /* Roll back the transaction so far and return */ + js_kctx_info->ctx.is_scheduled = false; - kbase_backend_release_free_address_space(kbdev, as_nr); - } + kbase_backend_release_free_address_space(kbdev, as_nr); mutex_unlock(&js_devdata->runpool_mutex); @@ -2254,11 +1927,6 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, unsigned long flags; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (kctx->as_pending) { - /* Context waiting for AS to be assigned */ - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - return false; - } if (kbase_backend_use_ctx_sched(kbdev, kctx)) { /* Context already has ASID - mark as active */ kbdev->hwaccess.active_kctx = kctx; @@ -2297,7 +1965,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, is_scheduled = js_kctx_info->ctx.is_scheduled; if (!is_scheduled) { /* Add the context to the pullable list */ - if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0)) + if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) kbase_js_sync_timers(kbdev); /* Fast-starting requires the jsctx_mutex to be dropped, @@ -2325,7 +1993,6 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_kctx_info *js_kctx_info; - bool pending; KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; @@ -2333,13 +2000,10 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /* We don't need to use the address space anymore */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); - pending = kctx->as_pending; mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Release the context - it will be scheduled out if there is no - * pending job */ - if (!pending) - kbasep_js_runpool_release_ctx(kbdev, kctx); + /* Release the context - it will be scheduled out */ + kbasep_js_runpool_release_ctx(kbdev, kctx); kbase_js_sched_all(kbdev); } @@ -2378,8 +2042,10 @@ void kbasep_js_suspend(struct kbase_device *kbdev) * the instrumented context. It'll be suspended by * disabling instrumentation */ if (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) - KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1); + KBASE_CTX_FLAG_PRIVILEGED) { + ++nr_privileged_ctx; + WARN_ON(nr_privileged_ctx != 1); + } } } CSTD_UNUSED(nr_privileged_ctx); @@ -2430,7 +2096,8 @@ void kbasep_js_resume(struct kbase_device *kbdev) if (!js_kctx_info->ctx.is_scheduled && kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = kbase_js_ctx_list_add_pullable( + timer_sync = + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, @@ -2502,17 +2169,57 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); - /* Add atom to ring buffer. */ - if (jsctx_rb_add_atom(kctx, katom)) { - jsctx_ll_add(kctx, katom); + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || + (katom->pre_dep && (katom->pre_dep->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + list_add_tail(&katom->queue, &queue->x_dep_head); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; enqueue_required = false; - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL; } else { - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + /* Add atom to ring buffer. */ + jsctx_tree_add(kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; } + return enqueue_required; } +/** + * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the + * runnable_tree, ready for execution + * @katom: Atom to submit + * + * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, + * but is still present in the x_dep list. If @katom has a same-slot dependent + * atom then that atom (and any dependents) will also be moved. + */ +static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock); + + while (katom) { + WARN_ON(!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + + if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + list_del(&katom->queue); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + jsctx_tree_add(katom->kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } else { + break; + } + + katom = katom->post_dep; + } +} + + /** * kbase_js_evict_deps - Evict dependencies of a failed atom. * @kctx: Context pointer @@ -2530,22 +2237,20 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom, int js, int prio) { struct kbase_jd_atom *x_dep = katom->x_post_dep; - struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio); + struct kbase_jd_atom *next_katom = katom->post_dep; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (next_katom && - (next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) { + if (next_katom) { KBASE_DEBUG_ASSERT(next_katom->status != KBASE_JD_ATOM_STATE_HW_COMPLETED); - next_katom->will_fail_event_code = katom->event_code; } /* Has cross slot depenency. */ - if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL | - KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) { + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; @@ -2553,6 +2258,8 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { x_dep->will_fail_event_code = katom->event_code; } + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) + kbase_js_move_to_tree(x_dep); } } @@ -2582,8 +2289,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be * present on the same slot */ - if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && - atomic_read(&kctx->atoms_pulled_slot[js])) { + if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { struct kbase_jd_atom *prev_atom = kbase_backend_inspect_tail(kctx->kbdev, js); @@ -2658,7 +2364,7 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled_slot[js]) && jsctx_rb_none_to_pull(kctx, js)) - timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); if (!atomic_read(&kctx->atoms_pulled)) { if (!kctx->slots_pullable) { @@ -2679,7 +2385,7 @@ static void js_return_worker(struct work_struct *data) for (slot = 0; slot < num_slots; slot++) { if (kbase_js_ctx_pullable(kctx, slot, true)) timer_sync |= - kbase_js_ctx_list_add_pullable( + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, slot); } } @@ -2753,9 +2459,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, mutex_lock(&js_devdata->runpool_mutex); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { - jsctx_rb_remove(kctx, katom); - + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); @@ -2767,12 +2471,15 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, timer_sync = true; } } - WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL); + WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && - jsctx_rb_none_to_pull(kctx, atom_slot)) - timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx, - atom_slot); + jsctx_rb_none_to_pull(kctx, atom_slot)) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) + timer_sync |= kbase_js_ctx_list_remove_nolock( + kctx->kbdev, kctx, atom_slot); + } /* * If submission is disabled on this context (most likely due to an @@ -2788,7 +2495,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); } } else if (katom->x_post_dep && @@ -2797,7 +2505,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); } } @@ -2822,12 +2531,11 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) struct kbase_device *kbdev; struct kbase_context *kctx = katom->kctx; union kbasep_js_policy *js_policy; - struct kbasep_js_device_data *js_devdata; + struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; js_policy = &kbdev->js_data.policy; - js_devdata = &kbdev->js_data; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); @@ -2846,16 +2554,6 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) katom->slot_nr), NULL, 0); #endif - kbase_tlstream_tl_nret_atom_lpu( - katom, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); - kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); - kbase_tlstream_tl_nret_ctx_lpu( - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); - /* Calculate the job's time used */ if (end_timestamp != NULL) { /* Only calculating it for jobs that really run on the HW (e.g. @@ -2879,21 +2577,26 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); /* Unblock cross dependency if present */ - if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(katom->x_post_dep->atom_flags & - KBASE_KATOM_FLAG_FAIL_BLOCKER))) - katom->x_post_dep->atom_flags &= - ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false); + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + kbase_js_move_to_tree(x_dep); + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + x_dep->slot_nr); + } } void kbase_js_sched(struct kbase_device *kbdev, int js_mask) { struct kbasep_js_device_data *js_devdata; - union kbasep_js_policy *js_policy; bool timer_sync = false; js_devdata = &kbdev->js_data; - js_policy = &js_devdata->policy; down(&js_devdata->schedule_sem); mutex_lock(&js_devdata->queue_mutex); @@ -2943,16 +2646,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Context can not be used at this time */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (kctx->as_pending || - kbase_js_ctx_pullable(kctx, js, false) + if (kbase_js_ctx_pullable(kctx, js, false) || (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED)) timer_sync |= - kbase_js_ctx_list_add_pullable_head( + kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable( + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); spin_unlock_irqrestore( &js_devdata->runpool_irq.lock, flags); @@ -2981,12 +2683,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Failed to pull jobs - push to head of list */ if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_head( + kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable( + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); @@ -3012,10 +2714,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Push to back of list */ if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable( + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( kctx->kbdev, kctx, js); else - timer_sync |= kbase_js_ctx_list_add_unpullable( + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); @@ -3046,6 +2750,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* First, atomically do the following: * - mark the context as dying * - try to evict it from the policy queue */ + mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.is_dying = true; @@ -3121,6 +2826,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); } else { unsigned long flags; bool was_retained; @@ -3156,6 +2862,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index bdb820a7e952..66b213293016 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -608,6 +608,16 @@ void kbase_js_zap_context(struct kbase_context *kctx); bool kbase_js_is_atom_valid(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + /* * Helpers follow */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index 75d4b98b24a5..e1342045b394 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -348,8 +348,8 @@ struct kbasep_js_device_data { u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ - /**< Value for JS_SOFT_EVENT_TIMEOUT */ - atomic_t soft_event_timeout_ms; + /**< Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c index 692460710ce0..90c13458ec7c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -235,16 +235,11 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context * void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx) { - struct kbasep_js_policy_cfs_ctx *ctx_info; - struct kbasep_js_policy_cfs *policy_info; struct kbase_device *kbdev; KBASE_DEBUG_ASSERT(js_policy != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); - policy_info = &js_policy->cfs; - ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; - kbdev = container_of(js_policy, struct kbase_device, js_data.policy); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 0c69bf4d5c62..c1851caa95a0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -30,13 +30,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -610,6 +610,12 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) goto fail_unlock; } + if (same_va->nr_pages < jit_va_pages || + kctx->same_va_end < jit_va_pages) { + err = -ENOMEM; + goto fail_unlock; + } + /* It's safe to adjust the same VA zone now */ same_va->nr_pages -= jit_va_pages; kctx->same_va_end -= jit_va_pages; @@ -789,41 +795,6 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_EXPORT_TEST_API(kbase_free_alloced_region); -void kbase_mmu_update(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); - - kctx->kbdev->mmu_mode->update(kctx); -} - -KBASE_EXPORT_TEST_API(kbase_mmu_update); - -void kbase_mmu_disable(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(NULL != kctx); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); -} - -KBASE_EXPORT_TEST_API(kbase_mmu_disable); - -void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - kbdev->mmu_mode->disable_as(kbdev, as_nr); -} - int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { int err; @@ -1180,12 +1151,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); goto out; } -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* Wait for GPU to flush write buffer before freeing physical pages */ - kbase_wait_write_flush(kctx); - } -#endif + /* This will also free the physical pages */ kbase_free_alloced_region(reg); @@ -1607,6 +1573,7 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); +#ifdef CONFIG_DEBUG_FS struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *); struct mutex lock; @@ -1783,6 +1750,7 @@ void kbase_jit_debugfs_add(struct kbase_context *kctx) debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_phys_fops); } +#endif /* CONFIG_DEBUG_FS */ /** * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations @@ -2069,11 +2037,19 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pages = alloc->imported.user_buf.pages; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#else + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#endif if (pinned_pages <= 0) return pinned_pages; @@ -2279,7 +2255,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( /* decide what needs to happen for this resource */ switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) goto exit; @@ -2293,7 +2269,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( } } break; - case BASE_MEM_IMPORT_TYPE_UMP: { + case KBASE_MEM_TYPE_IMPORTED_UMP: { #if defined(CONFIG_KDS) && defined(CONFIG_UMP) if (kds_res_count) { struct kds_resource *kds_res; @@ -2309,7 +2285,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( break; } #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: { + case KBASE_MEM_TYPE_IMPORTED_UMM: { #ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS if (kds_res_count) { struct kds_resource *kds_res; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index dea3aba0475e..7b2433e868bd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -618,6 +618,9 @@ void kbase_mmu_term(struct kbase_context *kctx); phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); void kbase_mmu_free_pgd(struct kbase_context *kctx); +int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags); int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); @@ -650,6 +653,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); void kbase_mmu_update(struct kbase_context *kctx); /** + * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. + * @kctx: Kbase context + * + * Disable and perform the required cache maintenance to remove the all + * data from provided kbase context from the GPU caches. + * * The caller has the following locking conditions: * - It must hold kbase_as::transaction_mutex on kctx's address space * - It must hold the kbasep_js_device_data::runpool_irq::lock @@ -657,11 +666,13 @@ void kbase_mmu_update(struct kbase_context *kctx); void kbase_mmu_disable(struct kbase_context *kctx); /** - * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space. - * + * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified + * address space. * @kbdev: Kbase device - * @as_nr: Number of the address space for which the MMU - * should be set in unmapped mode. + * @as_nr: The address space number to set to unmapped. + * + * This function must only be called during reset/power-up and it used to + * ensure the registers are in a known state. * * The caller must hold kbdev->as[as_nr].transaction_mutex. */ @@ -881,11 +892,13 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); +#ifdef CONFIG_DEBUG_FS /** * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. * @kctx: kbase context */ void kbase_jit_debugfs_add(struct kbase_context *kctx); +#endif /* CONFIG_DEBUG_FS */ /** * kbase_jit_init - Initialize the JIT memory pool management diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 0fb570ccc721..f91d3c916355 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1086,7 +1086,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* no read or write permission given on import, only on run do we give the right permissions */ - reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM; + reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; reg->gpu_alloc->imported.umm.sgt = NULL; reg->gpu_alloc->imported.umm.dma_buf = dma_buf; reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; @@ -1184,8 +1184,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* We can't really store the page list because that would involve */ /* keeping the pages pinned - instead we pin/unpin around the job */ /* (as part of the external resources handling code) */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); +#else + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); +#endif up_read(¤t->mm->mmap_sem); if (faulted_pages != *va_pages) @@ -1651,18 +1656,6 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, ret = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, delta); - if (ret) - return ret; - -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* - * Wait for GPU to flush write buffer before freeing - * physical pages. - */ - kbase_wait_write_flush(kctx); - } -#endif return ret; } @@ -2450,8 +2443,8 @@ out: KBASE_EXPORT_TEST_API(kbase_mmap); -void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map) +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map) { struct kbase_va_region *reg; unsigned long page_index; @@ -2489,6 +2482,11 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ + if ((reg->flags & prot_request) != prot_request) + goto out_unlock; + page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) goto out_unlock; @@ -2505,6 +2503,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, /* Map uncached */ prot = pgprot_writecombine(prot); } + /* Note: enforcing a RO prot_request onto prot is not done, since: + * - CPU-arch-specific integration required + * - kbase_vmap() requires no access checks to be made/enforced */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); @@ -2563,6 +2564,17 @@ out_unlock: kbase_gpu_vm_unlock(kctx); return NULL; } + +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) +{ + /* 0 is specified for prot_request to indicate no access checks should + * be made. + * + * As mentioned in kbase_vmap_prot() this means that a kernel-side + * CPU-RO mapping is not enforced to allow this to work */ + return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); +} KBASE_EXPORT_TEST_API(kbase_vmap); void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index 6c0fb5648514..6471747a7dc5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -118,8 +118,83 @@ struct kbase_vmap_struct { size_t size; bool is_cached; }; + + +/** + * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the + * requested access permissions are supported + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @prot_request: Flags indicating how the caller will then access the memory + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check + * whether the region should allow the intended access, and return an error if + * disallowed. This is essential for security of imported memory, particularly + * a user buf from SHM mapped into the process as RO. In that case, write + * access must be checked if the intention is for kernel to write to the + * memory. + * + * The checks are also there to help catch access errors on memory where + * security is not a concern: imported memory that is always RW, and memory + * that was allocated and owned by the process attached to @kctx. In this case, + * it helps to identify memory that was was mapped with the wrong access type. + * + * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases + * where either the security of memory is solely dependent on those flags, or + * when userspace code was expecting only the GPU to access the memory (e.g. HW + * workarounds). + * + */ +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map); + +/** + * kbase_vmap - Map a GPU VA range into the kernel safely + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no + * checks to ensure the security of e.g. imported user bufs from RO SHM. + */ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, struct kbase_vmap_struct *map); + +/** + * kbase_vunmap - Unmap a GPU VA range from the kernel + * @kctx: Context the VA range belongs to + * @map: Structure describing the mapping from the corresponding kbase_vmap() + * call + * + * Unmaps a GPU VA range from the kernel, given its @map structure obtained + * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * The reference taken on pages during kbase_vmap() is released. + */ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); /** @brief Allocate memory from kernel space and map it onto the GPU diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index 0b19d05c46e8..03594102f7ef 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h index 9555197f305c..a1dc2e0b165b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,6 @@ #ifndef _KBASE_MEM_PROFILE_DEBUGFS_H #define _KBASE_MEM_PROFILE_DEBUGFS_H -#include #include #include diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index ad4e572771fa..48d53723a9b4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -31,6 +31,7 @@ #include #endif #include +#include #include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -39,9 +40,31 @@ #include #include #include +#include #define KBASE_MMU_PAGE_ENTRIES 512 +/** + * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * + * If sync is not set then transactions still in flight when the flush is issued + * may use the old page tables and the data they write will not be written out + * to memory, this function returns after the flush has been issued but + * before all accesses which might effect the flushed region have completed. + * + * If sync is set then accesses in the flushed region will be drained + * before data is flush and invalidated through L1, L2 and into memory, + * after which point this function will return. + */ +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync); + /** * kbase_mmu_sync_pgd - sync page directory to memory * @kbdev: Device pointer. @@ -254,13 +277,26 @@ void page_fault_worker(struct work_struct *data) if (grown) { + u64 pfn_offset; u32 op; /* alloc success */ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); /* set up the new pages */ - err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); + pfn_offset = kbase_reg_current_backed_size(region) - new_pages; + /* + * Note: + * Issuing an MMU operation will unlock the MMU and cause the + * translation to be replayed. If the page insertion fails then + * rather then trying to continue the context should be killed + * so the no_flush version of insert_pages is used which allows + * us to unlock the MMU as we see fit. + */ + err = kbase_mmu_insert_pages_no_flush(kctx, + region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) @@ -542,6 +578,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + size_t remain = nr; int err; KBASE_DEBUG_ASSERT(NULL != kctx); @@ -549,16 +586,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + mutex_lock(&kctx->mmu_lock); - while (nr) { + while (remain) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > nr) - count = nr; + if (count > remain) + count = remain; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -605,7 +646,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } vpfn += count; - nr -= count; + remain -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -619,17 +660,16 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_count += count; } mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return 0; fail_unlock: mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return err; } -/* - * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' - */ -int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, +int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) { @@ -640,6 +680,7 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + size_t remain = nr; int err; KBASE_DEBUG_ASSERT(NULL != kctx); @@ -647,16 +688,20 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + mutex_lock(&kctx->mmu_lock); - while (nr) { + while (remain) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > nr) - count = nr; + if (count > remain) + count = remain; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -704,7 +749,7 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys += count; vpfn += count; - nr -= count; + remain -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -726,78 +771,200 @@ fail_unlock: return err; } +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags) +{ + int err; + + err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + return err; +} + KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * This function is responsible for validating the MMU PTs - * triggering reguired flushes. + * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * without retaining the kbase context. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. * - * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. + * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any + * other locking. */ -static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr) +static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err; + u32 op; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, op, 0); +#if KBASE_GPU_RESET_EN + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + +#ifndef CONFIG_MALI_NO_MALI + /* + * As this function could be called in interrupt context the sync + * request can't block. Instead log the request and the next flush + * request will pick it up. + */ + if ((!err) && sync && + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) + atomic_set(&kctx->drain_pending, 1); +#endif /* !CONFIG_MALI_NO_MALI */ +} + +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) { struct kbase_device *kbdev; bool ctx_is_in_runpool; +#ifndef CONFIG_MALI_NO_MALI + bool drain_pending = false; - KBASE_DEBUG_ASSERT(NULL != kctx); + if (atomic_xchg(&kctx->drain_pending, 0)) + drain_pending = true; +#endif /* !CONFIG_MALI_NO_MALI */ - kbdev = kctx->kbdev; + /* Early out if there is nothing to do */ + if (nr == 0) + return; - /* We must flush if we're currently running jobs. At the very least, we need to retain the - * context to ensure it doesn't schedule out whilst we're trying to flush it */ + kbdev = kctx->kbdev; ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - /* Second level check is to try to only do this when jobs are running. The refcount is - * a heuristic for this. */ - if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) { - if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - int ret; - u32 op; - - /* AS transaction begin */ - mutex_lock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - - if (kbase_hw_has_issue(kbdev, - BASE_HW_ISSUE_6367)) - op = AS_COMMAND_FLUSH; - else - op = AS_COMMAND_FLUSH_MEM; - - ret = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, - op, 0); + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + int err; + u32 op; + + /* AS transaction begin */ + mutex_lock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, op, 0); + #if KBASE_GPU_RESET_EN - if (ret) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } #endif /* KBASE_GPU_RESET_EN */ - mutex_unlock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - /* AS transaction end */ + mutex_unlock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + /* AS transaction end */ - kbase_pm_context_idle(kbdev); +#ifndef CONFIG_MALI_NO_MALI + /* + * The transaction lock must be dropped before here + * as kbase_wait_write_flush could take it if + * the GPU was powered down (static analysis doesn't + * know this can't happen). + */ + drain_pending |= (!err) && sync && + kbase_hw_has_issue(kctx->kbdev, + BASE_HW_ISSUE_6367); + if (drain_pending) { + /* Wait for GPU to flush write buffer */ + kbase_wait_write_flush(kctx); } +#endif /* !CONFIG_MALI_NO_MALI */ + + kbase_pm_context_idle(kbdev); } kbasep_js_runpool_release_ctx(kbdev, kctx); } } +void kbase_mmu_update(struct kbase_context *kctx) +{ + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + + kctx->kbdev->mmu_mode->update(kctx); +} +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. + */ + kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + /* * We actually only discard the ATE, and not the page table * pages. There is a potential DoS here, as we'll leak memory by @@ -870,11 +1037,12 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) } mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush(kctx, vpfn, requested_nr); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return 0; fail_unlock: mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return err; } @@ -904,6 +1072,10 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + mutex_lock(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -951,11 +1123,12 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph } mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush(kctx, vpfn, requested_nr); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return 0; fail_unlock: mutex_unlock(&kctx->mmu_lock); + kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); return err; } @@ -1246,13 +1419,17 @@ void bus_fault_worker(struct work_struct *data) #endif /* KBASE_GPU_RESET_EN */ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + unsigned long flags; /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ /* AS transaction begin */ mutex_lock(&kbdev->as[as_no].transaction_mutex); /* Set the MMU into unmapped mode */ - kbase_mmu_disable_as(kbdev, as_no); + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, + flags); mutex_unlock(&kbdev->as[as_no].transaction_mutex); /* AS transaction end */ @@ -1538,7 +1715,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - kbase_mmu_disable_as(kbdev, as_no); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&as->transaction_mutex); /* AS transaction end */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c new file mode 100644 index 000000000000..791f3edf448d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c @@ -0,0 +1,200 @@ +/* + * + * (C) COPYRIGHT 2010-2014, 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include "mali_kbase_mmu_mode.h" + +#include "mali_kbase.h" +#include "mali_midg_regmap.h" + +#define ENTRY_TYPE_MASK 3ULL +/* For valid ATEs bit 1 = (level == 3) ? 1 : 0. + * The MMU is only ever configured by the driver so that ATEs + * are at level 3, so bit 1 should always be set + */ +#define ENTRY_IS_ATE 3ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ +#define ENTRY_ACCESS_RO (3ULL << 6) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +/* Helper Function to perform assignment of page table entries, to + * ensure the use of strd, which is required on LPAE systems. + */ +static inline void page_table_entry_set(u64 *pte, u64 phy) +{ +#ifdef CONFIG_64BIT + *pte = phy; +#elif defined(CONFIG_ARM) + /* + * In order to prevent the compiler keeping cached copies of + * memory, we have to explicitly say that we have updated memory. + * + * Note: We could manually move the data ourselves into R0 and + * R1 by specifying register variables that are explicitly + * given registers assignments, the down side of this is that + * we have to assume cpu endianness. To avoid this we can use + * the ldrd to read the data from memory into R0 and R1 which + * will respect the cpu endianness, we then use strd to make + * the 64 bit assignment to the page table entry. + */ + asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" + "strd r0, r1, [%[pte]]\n\t" + : "=m" (*pte) + : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) + : "r0", "r1"); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +} + +static void mmu_get_as_setup(struct kbase_context *kctx, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_AARCH64_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)); + + setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK; + setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} + +static void mmu_update(struct kbase_context *kctx) +{ + struct kbase_device * const kbdev = kctx->kbdev; + struct kbase_as * const as = &kbdev->as[kctx->as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + mmu_get_as_setup(kctx, current_setup); + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as, kctx); +} + +static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + struct kbase_as * const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + current_setup->transtab = 0ULL; + current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as, NULL); +} + +static phys_addr_t pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static int ate_is_valid(u64 ate) +{ + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); +} + +static int pte_is_valid(u64 pte) +{ + return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + +/* + * Map KBASE_REG flags to MMU flags + */ +static u64 get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + + /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ + mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + + /* Set access flags - note that AArch64 stage 1 does not support + * write-only access, so we use read/write instead + */ + if (flags & KBASE_REG_GPU_WR) + mmu_flags |= ENTRY_ACCESS_RW; + else if (flags & KBASE_REG_GPU_RD) + mmu_flags |= ENTRY_ACCESS_RO; + + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) +{ + page_table_entry_set(entry, (phy & ~0xFFF) | + get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE); +} + +static void entry_set_pte(u64 *entry, phys_addr_t phy) +{ + page_table_entry_set(entry, (phy & ~0xFFF) | + ENTRY_ACCESS_BIT | ENTRY_IS_PTE); +} + +static void entry_invalidate(u64 *entry) +{ + page_table_entry_set(entry, ENTRY_IS_INVAL); +} + +static struct kbase_mmu_mode const aarch64_mode = { + .update = mmu_update, + .get_as_setup = mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entry_invalidate = entry_invalidate +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) +{ + return &aarch64_mode; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c index 261441fa145b..97d543464c28 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_pm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,10 @@ * @file mali_kbase_pm.c * Base kernel power management APIs */ + #include #include -#include -#include +#include #include @@ -151,6 +151,10 @@ void kbase_pm_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); + /* Suspend vinstr. + * This call will block until vinstr is suspended. */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + mutex_lock(&kbdev->pm.lock); KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); kbdev->pm.suspending = true; @@ -164,9 +168,6 @@ void kbase_pm_suspend(struct kbase_device *kbdev) * the PM active count references */ kbasep_js_suspend(kbdev); - /* Suspend any counter collection that might be happening */ - kbase_instr_hwcnt_suspend(kbdev); - /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this * reaches zero. */ @@ -186,9 +187,6 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Initial active call, to power on the GPU/cores if needed */ kbase_pm_context_active(kbdev); - /* Re-enable instrumentation, if it was previously disabled */ - kbase_instr_hwcnt_resume(kbdev); - /* Resume any blocked atoms (which may cause contexts to be scheduled in * and dependent atoms to run) */ kbase_resume_suspended_soft_jobs(kbdev); @@ -200,5 +198,8 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Matching idle call, to power off the GPU/cores if we didn't actually * need it and the policy doesn't want it on */ kbase_pm_context_idle(kbdev); + + /* Resume vinstr operation */ + kbase_vinstr_resume(kbdev->vinstr_ctx); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index 170b9390a773..d3a3dbfa5241 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -756,7 +756,7 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, struct base_jd_atom_v2 *t_atom, struct base_jd_atom_v2 *f_atom) { - base_jd_replay_payload *payload; + base_jd_replay_payload *payload = NULL; u64 next; u64 prev_jc = 0; u16 hw_job_id_offset = 0; @@ -767,12 +767,27 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, replay_atom->jc, sizeof(payload)); payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); - if (!payload) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); return -EINVAL; } +#ifdef BASE_LEGACY_UK10_2_SUPPORT + if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) { + base_jd_replay_payload_uk10_2 *payload_uk10_2; + u16 tiler_core_req; + u16 fragment_core_req; + + payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload; + memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req, + sizeof(tiler_core_req)); + memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req, + sizeof(fragment_core_req)); + payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff); + payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff); + } +#endif /* BASE_LEGACY_UK10_2_SUPPORT */ + #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); dev_dbg(kctx->kbdev->dev, "Payload structure:\n" @@ -794,20 +809,17 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, payload->fragment_core_req); payload_dump(kctx, payload); #endif - t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; /* Sanity check core requirements*/ - if (unlikely((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & - ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || - (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & - ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC) != BASE_JD_REQ_FS || + if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || + (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || - f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)) { + f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - int t_atom_type = t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; - int f_atom_type = f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; + int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; + int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 0fe489031cde..07b862546f80 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -53,90 +53,85 @@ void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) unsigned long lflags; spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); } -static struct page *kbasep_translate_gpu_addr_to_kernel_page( - struct kbase_context *kctx, u64 gpu_addr) +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) { - u64 pfn; - struct kbase_va_region *reg; - phys_addr_t addr = 0; - - KBASE_DEBUG_ASSERT(NULL != kctx); + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; - pfn = gpu_addr >> PAGE_SHIFT; + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_del(&katom->queue); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto err_vm_unlock; - addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; - kbase_gpu_vm_unlock(kctx); +static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; - if (!addr) - goto err; + /* Record the start time of this atom so we could cancel it at + * the right time. + */ + katom->start_timestamp = ktime_get(); - return pfn_to_page(PFN_DOWN(addr)); + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed. + */ + kbasep_add_waiting_soft_job(katom); -err_vm_unlock: - kbase_gpu_vm_unlock(kctx); -err: - return NULL; + /* Schedule timeout of this atom after a period if it is not active */ + if (!timer_pending(&kctx->soft_job_timeout)) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + mod_timer(&kctx->soft_job_timeout, + jiffies + msecs_to_jiffies(timeout_ms)); + } } -int kbasep_read_soft_event_status( +static int kbasep_read_soft_event_status( struct kbase_context *kctx, u64 evt, unsigned char *status) { - struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( - kctx, evt); - unsigned char *mapped_pg; - u32 offset = evt & ~PAGE_MASK; + unsigned char *mapped_evt; + struct kbase_vmap_struct map; - KBASE_DEBUG_ASSERT(NULL != status); + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; - if (!pg) - return -1; + *status = *mapped_evt; - mapped_pg = (unsigned char *)kmap_atomic(pg); - KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ - *status = *(mapped_pg + offset); - kunmap_atomic(mapped_pg); + kbase_vunmap(kctx, &map); return 0; } -int kbasep_write_soft_event_status( +static int kbasep_write_soft_event_status( struct kbase_context *kctx, u64 evt, unsigned char new_status) { - struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( - kctx, evt); - unsigned char *mapped_pg; - u32 offset = evt & ~PAGE_MASK; + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + if ((new_status != BASE_JD_SOFT_EVENT_SET) && + (new_status != BASE_JD_SOFT_EVENT_RESET)) + return -EINVAL; - KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) || - (new_status == BASE_JD_SOFT_EVENT_RESET)); + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; - if (!pg) - return -1; + *mapped_evt = new_status; - mapped_pg = (unsigned char *)kmap_atomic(pg); - KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ - *(mapped_pg + offset) = new_status; - kunmap_atomic(mapped_pg); + kbase_vunmap(kctx, &map); return 0; } static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { - struct kbase_va_region *reg; - phys_addr_t addr = 0; - u64 pfn; - u32 offset; - char *page; + struct kbase_vmap_struct map; + void *user_result; struct timespec ts; struct base_dump_cpu_gpu_counters data; u64 system_time; @@ -155,7 +150,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; /* We're suspended - queue this on the list of suspended jobs - * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */ + * Use dep_item[1], because dep_item[0] was previously in use + * for 'waiting_soft_jobs'. + */ mutex_lock(&js_devdata->runpool_mutex); list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); mutex_unlock(&js_devdata->runpool_mutex); @@ -176,44 +173,20 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) data.system_time = system_time; data.cycle_counter = cycle_counter; - pfn = jc >> PAGE_SHIFT; - offset = jc & ~PAGE_MASK; - /* Assume this atom will be cancelled until we know otherwise */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (offset > 0x1000 - sizeof(data)) { - /* Wouldn't fit in the page */ - return 0; - } - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc); - if (reg && - (reg->flags & KBASE_REG_GPU_WR) && - reg->cpu_alloc && reg->cpu_alloc->pages) - addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; - - kbase_gpu_vm_unlock(kctx); - if (!addr) - return 0; - - page = kmap(pfn_to_page(PFN_DOWN(addr))); - if (!page) + /* GPU_WR access is checked on the range for returning the result to + * userspace for the following reasons: + * - security, this is currently how imported user bufs are checked. + * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ + user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); + if (!user_result) return 0; - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + - offset, sizeof(data), - DMA_BIDIRECTIONAL); + memcpy(user_result, &data, sizeof(data)); - memcpy(page + offset, &data, sizeof(data)); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + - offset, sizeof(data), - DMA_BIDIRECTIONAL); - - kunmap(pfn_to_page(PFN_DOWN(addr))); + kbase_vunmap(kctx, &map); /* Atom was fine - mark it as done */ katom->event_code = BASE_JD_EVENT_DONE; @@ -223,22 +196,6 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) #ifdef CONFIG_SYNC -/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited) - * - * @param katom The atom to complete - */ -static void complete_soft_job(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - mutex_lock(&kctx->jctx.lock); - list_del(&katom->dep_item[0]); - kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(kctx->kbdev); - mutex_unlock(&kctx->jctx.lock); -} - static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result) { struct sync_pt *pt; @@ -280,7 +237,12 @@ static void kbase_fence_wait_worker(struct work_struct *data) katom = container_of(data, struct kbase_jd_atom, work); kctx = katom->kctx; - complete_soft_job(katom); + mutex_lock(&kctx->jctx.lock); + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); } static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter) @@ -297,11 +259,7 @@ static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fenc /* Propagate the fence status to the atom. * If negative then cancel this atom and its dependencies. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - if (fence->status < 0) -#else - if (atomic_read(&fence->status) < 0) -#endif + if (kbase_fence_get_status(fence) < 0) katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue @@ -340,7 +298,13 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom) queue_work(katom->kctx->jctx.job_done_wq, &katom->work); } +#ifdef CONFIG_MALI_FENCE_DEBUG + /* The timeout code will add this job to the list of waiting soft jobs. + */ + kbasep_add_waiting_with_timeout(katom); +#else kbasep_add_waiting_soft_job(katom); +#endif return 1; } @@ -372,6 +336,7 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) finish_softjob: katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); if (jd_done_nolock(katom, NULL)) @@ -403,12 +368,12 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { struct kbase_jd_atom *katom = list_entry( - entry, struct kbase_jd_atom, dep_item[0]); + entry, struct kbase_jd_atom, queue); - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == - BASE_JD_REQ_SOFT_EVENT_WAIT) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: if (katom->jc == evt) { - list_del(&katom->dep_item[0]); + list_del(&katom->queue); katom->event_code = BASE_JD_EVENT_DONE; INIT_WORK(&katom->work, @@ -417,69 +382,192 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) &katom->work); } else { /* There are still other waiting jobs, we cannot - * cancel the timer yet */ + * cancel the timer yet. + */ cancel_timer = 0; } + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Keep the timer running if fence debug is enabled and + * there are waiting fence jobs. + */ + cancel_timer = 0; + break; +#endif } } if (cancel_timer) - hrtimer_try_to_cancel(&kctx->soft_event_timeout); + del_timer(&kctx->soft_job_timeout); spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); } -enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer) +#ifdef CONFIG_MALI_FENCE_DEBUG +static char *kbase_fence_debug_status_string(int status) +{ + if (status == 0) + return "signaled"; + else if (status > 0) + return "active"; + else + return "error"; +} + +static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) { - struct kbase_context *kctx = container_of(timer, struct kbase_context, - soft_event_timeout); + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + struct sync_fence *fence = dep->fence; + int status = kbase_fence_get_status(fence); + + /* Found blocked trigger fence. */ + dev_warn(dev, + "\tVictim trigger atom %d fence [%p] %s: %s\n", + kbase_jd_atom_id(kctx, dep), + fence, fence->name, + kbase_fence_debug_status_string(status)); + } + + kbase_fence_debug_check_atom(dep); + } + } +} + +static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = katom->kctx->kbdev->dev; + struct sync_fence *fence = katom->fence; + int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); + int status = kbase_fence_get_status(fence); + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + kctx->tgid, kctx->id, + kbase_jd_atom_id(kctx, katom), + fence, timeout_ms); + dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + fence, fence->name, + kbase_fence_debug_status_string(status)); + + /* Search for blocked trigger atoms */ + kbase_fence_debug_check_atom(katom); + + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + /* Dump out the full state of all the Android sync fences. + * The function sync_dump() isn't exported to modules, so force + * sync_fence_wait() to time out to trigger sync_dump(). + */ + sync_fence_wait(fence, 1); +} + +struct kbase_fence_debug_work { + struct kbase_jd_atom *katom; + struct work_struct work; +}; + +static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) +{ + struct kbase_fence_debug_work *w = container_of(work, + struct kbase_fence_debug_work, work); + struct kbase_jd_atom *katom = w->katom; + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbase_fence_debug_wait_timeout(katom); + mutex_unlock(&kctx->jctx.lock); + + kfree(w); +} + +static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_fence_debug_work *work; + struct kbase_context *kctx = katom->kctx; + + /* Enqueue fence debug worker. Use job_done_wq to get + * debug print ordered with job completion. + */ + work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); + /* Ignore allocation failure. */ + if (work) { + work->katom = katom; + INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); + queue_work(kctx->jctx.job_done_wq, &work->work); + } +} +#endif /* CONFIG_MALI_FENCE_DEBUG */ + +void kbasep_soft_job_timeout_worker(unsigned long data) +{ + struct kbase_context *kctx = (struct kbase_context *)data; u32 timeout_ms = (u32)atomic_read( - &kctx->kbdev->js_data.soft_event_timeout_ms); + &kctx->kbdev->js_data.soft_job_timeout_ms); + struct timer_list *timer = &kctx->soft_job_timeout; ktime_t cur_time = ktime_get(); - enum hrtimer_restart restarting = HRTIMER_NORESTART; + bool restarting = false; unsigned long lflags; struct list_head *entry, *tmp; spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry( - entry, struct kbase_jd_atom, dep_item[0]); - - if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == - BASE_JD_REQ_SOFT_EVENT_WAIT) { - s64 elapsed_time = - ktime_to_ms(ktime_sub(cur_time, - katom->start_timestamp)); - if (elapsed_time > (s64)timeout_ms) { - /* Take it out of the list to ensure that it - * will be cancelled in all cases */ - list_del(&katom->dep_item[0]); - - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - INIT_WORK(&katom->work, - kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, - &katom->work); - } else { - restarting = HRTIMER_RESTART; - } + struct kbase_jd_atom *katom = list_entry(entry, + struct kbase_jd_atom, queue); + s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + + if (elapsed_time < (s64)timeout_ms) { + restarting = true; + continue; + } + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + /* Take it out of the list to ensure that it + * will be cancelled in all cases + */ + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, &katom->work); + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_debug_timeout(katom); + break; +#endif } } if (restarting) - hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms)); + mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); - - return restarting; } static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - ktime_t remaining; unsigned char status; /* The status of this soft-job is stored in jc */ - if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) { + if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; return 0; } @@ -487,29 +575,12 @@ static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) if (status == BASE_JD_SOFT_EVENT_SET) return 0; /* Event already set, nothing to do */ - /* Record the start time of this atom so we could cancel it at - * the right time */ - katom->start_timestamp = ktime_get(); - - /* Add the atom to the waiting list before the timer is - * (re)started to make sure that it gets processed */ - kbasep_add_waiting_soft_job(katom); - - /* Schedule cancellation of this atom after a period if it is - * not active */ - remaining = hrtimer_get_remaining(&kctx->soft_event_timeout); - if (remaining.tv64 <= 0) { - int timeout_ms = atomic_read( - &kctx->kbdev->js_data.soft_event_timeout_ms); - hrtimer_start(&kctx->soft_event_timeout, - HR_TIMER_DELAY_MSEC((u64)timeout_ms), - HRTIMER_MODE_REL); - } + kbasep_add_waiting_with_timeout(katom); return 1; } -static void kbasep_soft_event_update(struct kbase_jd_atom *katom, +static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, unsigned char new_status) { /* Complete jobs waiting on the same event */ @@ -524,6 +595,38 @@ static void kbasep_soft_event_update(struct kbase_jd_atom *katom, kbasep_complete_triggered_soft_events(kctx, katom->jc); } +/** + * kbase_soft_event_update() - Update soft event state + * @kctx: Pointer to context + * @event: Event to update + * @new_status: New status value of event + * + * Update the event, and wake up any atoms waiting for the event. + * + * Return: 0 on success, a negative error code on failure. + */ +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status) +{ + int err = 0; + + mutex_lock(&kctx->jctx.lock); + + if (kbasep_write_soft_event_status(kctx, event, new_status)) { + err = -ENOENT; + goto out; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, event); + +out: + mutex_unlock(&kctx->jctx.lock); + + return err; +} + static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) { katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; @@ -590,7 +693,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) kfree(buffers[i].pages); if (reg && reg->gpu_alloc) { switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { free_user_buffer(&buffers[i]); break; @@ -705,7 +808,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; unsigned long nr_pages = @@ -731,7 +834,7 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) ret = 0; break; } - case BASE_MEM_IMPORT_TYPE_UMP: + case KBASE_MEM_TYPE_IMPORTED_UMP: { dev_warn(katom->kctx->kbdev->dev, "UMP is not supported for debug_copy jobs\n"); @@ -825,7 +928,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, } switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { for (i = 0; i < buf_data->nr_extres_pages; i++) { struct page *pg = buf_data->extres_pages[i]; @@ -846,13 +949,17 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, } break; #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: { + case KBASE_MEM_TYPE_IMPORTED_UMM: { struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; KBASE_DEBUG_ASSERT(dma_buf != NULL); + KBASE_DEBUG_ASSERT(dma_buf->size == + buf_data->nr_extres_pages * PAGE_SIZE); - ret = dma_buf_begin_cpu_access(dma_buf, 0, - buf_data->nr_extres_pages*PAGE_SIZE, + ret = dma_buf_begin_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + 0, buf_data->nr_extres_pages*PAGE_SIZE, +#endif DMA_FROM_DEVICE); if (ret) goto out_unlock; @@ -872,8 +979,10 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, if (target_page_nr >= buf_data->nr_pages) break; } - dma_buf_end_cpu_access(dma_buf, 0, - buf_data->nr_extres_pages*PAGE_SIZE, + dma_buf_end_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + 0, buf_data->nr_extres_pages*PAGE_SIZE, +#endif DMA_FROM_DEVICE); break; } @@ -1187,7 +1296,7 @@ static void kbase_ext_res_finish(struct kbase_jd_atom *katom) int kbase_process_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: return kbase_dump_cpu_gpu_time(katom); #ifdef CONFIG_SYNC @@ -1206,10 +1315,10 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_EVENT_WAIT: return kbasep_soft_event_wait(katom); case BASE_JD_REQ_SOFT_EVENT_SET: - kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET); + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); break; case BASE_JD_REQ_SOFT_EVENT_RESET: - kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET); + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); break; case BASE_JD_REQ_SOFT_DEBUG_COPY: { @@ -1239,7 +1348,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) void kbase_cancel_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { #ifdef CONFIG_SYNC case BASE_JD_REQ_SOFT_FENCE_WAIT: kbase_fence_cancel_wait(katom); @@ -1256,7 +1365,7 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) int kbase_prepare_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: { if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) @@ -1331,7 +1440,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ break; @@ -1400,14 +1509,14 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) /* Remove from the global list */ list_del(&katom_iter->dep_item[1]); /* Remove from the context's list of waiting soft jobs */ - list_del(&katom_iter->dep_item[0]); + kbasep_remove_waiting_soft_job(katom_iter); if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); } else { KBASE_DEBUG_ASSERT((katom_iter->core_req & - BASEP_JD_REQ_ATOM_TYPE) + BASE_JD_REQ_SOFT_JOB_TYPE) != BASE_JD_REQ_SOFT_REPLAY); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h index 6d8e34d3c3ae..820bddc8c8b1 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_sync.h +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,6 +36,15 @@ static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) } #endif +static inline int kbase_fence_get_status(struct sync_fence *fence) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return fence->status; +#else + return atomic_read(&fence->status); +#endif +} + /* * Create a stream object. * Built on top of timeline object. diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c index ddd0847a69c5..b9baa913a693 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c +++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,6 @@ #include #include #include -#include static int kbase_stream_close(struct inode *inode, struct file *file) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 962b19cf380c..ad88b7b69018 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -29,7 +29,6 @@ #include #include #include -#include /*****************************************************************************/ @@ -1097,7 +1096,7 @@ static ssize_t kbasep_tlstream_read( while (copy_len < size) { enum tl_stream_type stype; - unsigned int rb_idx_raw; + unsigned int rb_idx_raw = 0; unsigned int rb_idx; size_t rb_size; diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index 38f8e1f2f390..e880d9663d0e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -49,9 +49,16 @@ * 10.2: * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA * region for use with JIT (ignored on 32-bit platforms) + * + * 10.3: + * - base_jd_core_req typedef-ed to u32 (instead of to u16) + * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END + * + * 10.4: + * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 2 +#define BASE_UK_VERSION_MINOR 4 struct kbase_uk_mem_alloc { union uk_header header; @@ -296,16 +303,6 @@ struct kbase_uk_model_control_params { }; #endif /* SUPPORT_MALI_NO_MALI */ -#define KBASE_MAXIMUM_EXT_RESOURCES 255 - -struct kbase_uk_ext_buff_kds_data { - union uk_header header; - union kbase_pointer external_resource; - union kbase_pointer file_descriptor; - u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */ - u32 padding; -}; - #ifdef BASE_LEGACY_UK8_SUPPORT struct kbase_uk_keep_gpu_powered { union uk_header header; @@ -474,7 +471,6 @@ enum kbase_uk_function_id { KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), - KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17), KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index 45de4e020dd6..bd6095f77480 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -62,6 +63,14 @@ enum { JM_HWCNT_BM }; +enum vinstr_state { + VINSTR_IDLE, + VINSTR_DUMPING, + VINSTR_SUSPENDING, + VINSTR_SUSPENDED, + VINSTR_RESUMING +}; + /** * struct kbase_vinstr_context - vinstr context per device * @lock: protects the entire vinstr context @@ -75,7 +84,12 @@ enum { * with hardware * @reprogram: when true, reprogram hwcnt block with the new set of * counters - * @suspended: when true, the context has been suspended + * @state: vinstr state + * @state_lock: protects information about vinstr state + * @suspend_waitq: notification queue to trigger state re-validation + * @suspend_cnt: reference counter of vinstr's suspend state + * @suspend_work: worker to execute on entering suspended state + * @resume_work: worker to execute on leaving suspended state * @nclients: number of attached clients, pending or otherwise * @waiting_clients: head of list of clients being periodically sampled * @idle_clients: head of list of clients being idle @@ -95,7 +109,13 @@ struct kbase_vinstr_context { size_t dump_size; u32 bitmap[4]; bool reprogram; - bool suspended; + + enum vinstr_state state; + struct spinlock state_lock; + wait_queue_head_t suspend_waitq; + unsigned int suspend_cnt; + struct work_struct suspend_work; + struct work_struct resume_work; u32 nclients; struct list_head waiting_clients; @@ -190,7 +210,10 @@ static const struct file_operations vinstr_client_fops = { static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_context *kctx = vinstr_ctx->kctx; + struct kbase_device *kbdev = kctx->kbdev; struct kbase_uk_hwcnt_setup setup; + int err; setup.dump_buffer = vinstr_ctx->gpu_va; setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; @@ -198,12 +221,46 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; - return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup); + /* Mark the context as active so the GPU is kept turned on */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread. */ + kbase_pm_context_active(kbdev); + + /* Schedule the context in */ + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); + if (err) { + /* Release the context. This had its own Power Manager Active + * reference */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + } + + return err; } static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { - kbase_instr_hwcnt_disable(vinstr_ctx->kctx); + struct kbase_context *kctx = vinstr_ctx->kctx; + struct kbase_device *kbdev = kctx->kbdev; + int err; + + err = kbase_instr_hwcnt_disable_internal(kctx); + if (err) { + dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", + kctx); + return; + } + + /* Release the context. This had its own Power Manager Active reference. */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference. */ + kbase_pm_context_idle(kbdev); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); } static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) @@ -312,6 +369,8 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { struct kbase_device *kbdev = vinstr_ctx->kbdev; struct kbasep_kctx_list_element *element; + unsigned long flags; + bool enable_backend = false; int err; vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); @@ -349,7 +408,16 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) "couldn't add kctx to kctx_list\n"); } - err = enable_hwcnt(vinstr_ctx); + /* Don't enable hardware counters if vinstr is suspended. + * Note that vinstr resume code is run under vinstr context lock, + * lower layer will be enabled as needed on resume. */ + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE == vinstr_ctx->state) + enable_backend = true; + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (enable_backend) + err = enable_hwcnt(vinstr_ctx); + if (err) { kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); @@ -865,6 +933,7 @@ static void kbasep_vinstr_add_dump_request( static int kbasep_vinstr_collect_and_accumulate( struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) { + unsigned long flags; int rcode; #ifdef CONFIG_MALI_NO_MALI @@ -872,6 +941,15 @@ static int kbasep_vinstr_collect_and_accumulate( gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); #endif + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE != vinstr_ctx->state) { + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + return -EAGAIN; + } else { + vinstr_ctx->state = VINSTR_DUMPING; + } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* Request HW counters dump. * Disable preemption to make dump timestamp more accurate. */ preempt_disable(); @@ -883,6 +961,21 @@ static int kbasep_vinstr_collect_and_accumulate( rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); WARN_ON(rcode); + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + switch (vinstr_ctx->state) + { + case VINSTR_SUSPENDING: + schedule_work(&vinstr_ctx->suspend_work); + break; + case VINSTR_DUMPING: + vinstr_ctx->state = VINSTR_IDLE; + wake_up_all(&vinstr_ctx->suspend_waitq); + break; + default: + break; + } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* Accumulate values of collected counters. */ if (!rcode) accum_clients(vinstr_ctx); @@ -970,6 +1063,20 @@ static int kbasep_vinstr_fill_dump_buffer_kernel( static void kbasep_vinstr_reprogram( struct kbase_vinstr_context *vinstr_ctx) { + unsigned long flags; + bool suspended = false; + + /* Don't enable hardware counters if vinstr is suspended. */ + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + if (VINSTR_IDLE != vinstr_ctx->state) + suspended = true; + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (suspended) + return; + + /* Change to suspended state is done while holding vinstr context + * lock. Below code will then no re-enable the instrumentation. */ + if (vinstr_ctx->reprogram) { struct kbase_vinstr_client *iter; @@ -1074,6 +1181,7 @@ static int kbasep_vinstr_service_task(void *data) while (!kthread_should_stop()) { struct kbase_vinstr_client *cli = NULL; struct kbase_vinstr_client *tmp; + int rcode; u64 timestamp = kbasep_vinstr_get_timestamp(); u64 dump_time = 0; @@ -1116,7 +1224,8 @@ static int kbasep_vinstr_service_task(void *data) continue; } - kbasep_vinstr_collect_and_accumulate(vinstr_ctx, ×tamp); + rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, + ×tamp); INIT_LIST_HEAD(&expired_requests); @@ -1145,10 +1254,11 @@ static int kbasep_vinstr_service_task(void *data) /* Expect only periodically sampled clients. */ BUG_ON(0 == cli->dump_interval); - kbasep_vinstr_update_client( - cli, - timestamp, - BASE_HWCNT_READER_EVENT_PERIODIC); + if (!rcode) + kbasep_vinstr_update_client( + cli, + timestamp, + BASE_HWCNT_READER_EVENT_PERIODIC); /* Set new dumping time. Drop missed probing times. */ do { @@ -1277,11 +1387,6 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return -EBUSY; - } - list_del(&cli->list); cli->dump_interval = interval; @@ -1572,6 +1677,84 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, /*****************************************************************************/ +/** + * kbasep_vinstr_kick_scheduler - trigger scheduler cycle + * @kbdev: pointer to kbase device structure + */ +static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + + down(&js_devdata->schedule_sem); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_jm_kick_all(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + up(&js_devdata->schedule_sem); +} + +/** + * kbasep_vinstr_suspend_worker - worker suspending vinstr module + * @data: pointer to work structure + */ +static void kbasep_vinstr_suspend_worker(struct work_struct *data) +{ + struct kbase_vinstr_context *vinstr_ctx; + unsigned long flags; + + vinstr_ctx = container_of(data, struct kbase_vinstr_context, + suspend_work); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->kctx) + disable_hwcnt(vinstr_ctx); + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + vinstr_ctx->state = VINSTR_SUSPENDED; + wake_up_all(&vinstr_ctx->suspend_waitq); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * This must happen after vinstr was suspended. */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); +} + +/** + * kbasep_vinstr_suspend_worker - worker resuming vinstr module + * @data: pointer to work structure + */ +static void kbasep_vinstr_resume_worker(struct work_struct *data) +{ + struct kbase_vinstr_context *vinstr_ctx; + unsigned long flags; + + vinstr_ctx = container_of(data, struct kbase_vinstr_context, + resume_work); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->kctx) + enable_hwcnt(vinstr_ctx); + + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + vinstr_ctx->state = VINSTR_IDLE; + wake_up_all(&vinstr_ctx->suspend_waitq); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + mutex_unlock(&vinstr_ctx->lock); + + /* Kick GPU scheduler to allow entering protected mode. + * Note that scheduler state machine might requested re-entry to + * protected mode before vinstr was resumed. + * This must happen after vinstr was release. */ + kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); +} + +/*****************************************************************************/ + struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) { struct kbase_vinstr_context *vinstr_ctx; @@ -1583,8 +1766,14 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&vinstr_ctx->idle_clients); INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); mutex_init(&vinstr_ctx->lock); + spin_lock_init(&vinstr_ctx->state_lock); vinstr_ctx->kbdev = kbdev; vinstr_ctx->thread = NULL; + vinstr_ctx->state = VINSTR_IDLE; + vinstr_ctx->suspend_cnt = 0; + INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); + INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); + init_waitqueue_head(&vinstr_ctx->suspend_waitq); atomic_set(&vinstr_ctx->request_pending, 0); init_waitqueue_head(&vinstr_ctx->waitq); @@ -1600,6 +1789,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) if (vinstr_ctx->thread) kthread_stop(vinstr_ctx->thread); + /* Wait for workers. */ + flush_work(&vinstr_ctx->suspend_work); + flush_work(&vinstr_ctx->resume_work); + while (1) { struct list_head *list = &vinstr_ctx->idle_clients; @@ -1732,11 +1925,6 @@ int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - rcode = -EBUSY; - goto exit; - } - if (event_mask & cli->event_mask) { rcode = kbasep_vinstr_collect_and_accumulate( vinstr_ctx, @@ -1772,11 +1960,6 @@ int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) mutex_lock(&vinstr_ctx->lock); - if (vinstr_ctx->suspended) { - rcode = -EBUSY; - goto exit; - } - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); if (rcode) goto exit; @@ -1793,40 +1976,66 @@ exit: return rcode; } -void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) +int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) { - u64 unused; + unsigned long flags; + int ret = -EAGAIN; KBASE_DEBUG_ASSERT(vinstr_ctx); - mutex_lock(&vinstr_ctx->lock); - if (!vinstr_ctx->nclients || vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + switch (vinstr_ctx->state) { + case VINSTR_SUSPENDED: + vinstr_ctx->suspend_cnt++; + /* overflow shall not happen */ + BUG_ON(0 == vinstr_ctx->suspend_cnt); + ret = 0; + break; + + case VINSTR_IDLE: + vinstr_ctx->state = VINSTR_SUSPENDING; + schedule_work(&vinstr_ctx->suspend_work); + break; + + case VINSTR_DUMPING: + vinstr_ctx->state = VINSTR_SUSPENDING; + break; + + case VINSTR_SUSPENDING: + /* fall through */ + case VINSTR_RESUMING: + break; + + default: + BUG(); + break; } + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); - vinstr_ctx->suspended = true; - vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; - INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); - mutex_unlock(&vinstr_ctx->lock); + return ret; } -void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) +void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) { + wait_event(vinstr_ctx->suspend_waitq, + (0 == kbase_vinstr_try_suspend(vinstr_ctx))); +} + +void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) +{ + unsigned long flags; + KBASE_DEBUG_ASSERT(vinstr_ctx); - mutex_lock(&vinstr_ctx->lock); - if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) { - mutex_unlock(&vinstr_ctx->lock); - return; + spin_lock_irqsave(&vinstr_ctx->state_lock, flags); + BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); + if (VINSTR_SUSPENDED == vinstr_ctx->state) { + BUG_ON(0 == vinstr_ctx->suspend_cnt); + vinstr_ctx->suspend_cnt--; + if (0 == vinstr_ctx->suspend_cnt) { + vinstr_ctx->state = VINSTR_RESUMING; + schedule_work(&vinstr_ctx->resume_work); + } } - - vinstr_ctx->suspended = false; - vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; - vinstr_ctx->reprogram = true; - kbasep_vinstr_reprogram(vinstr_ctx); - atomic_set(&vinstr_ctx->request_pending, 1); - wake_up_all(&vinstr_ctx->waitq); - mutex_unlock(&vinstr_ctx->lock); + spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index d32462aec653..6207d25aef06 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -103,18 +103,39 @@ int kbase_vinstr_hwc_dump( int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); /** - * kbase_vinstr_hwc_suspend - suspends hardware counter collection for - * a given kbase context + * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context * @vinstr_ctx: vinstr context + * + * Return: 0 on success, or negative if state change is in progress + * + * Warning: This API call is non-generic. It is meant to be used only by + * job scheduler state machine. + * + * Function initiates vinstr switch to suspended state. Once it was called + * vinstr enters suspending state. If function return non-zero value, it + * indicates that state switch is not complete and function must be called + * again. On state switch vinstr will trigger job scheduler state machine + * cycle. + */ +int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); + +/** + * kbase_vinstr_suspend - suspends operation of a given vinstr context + * @vinstr_ctx: vinstr context + * + * Function initiates vinstr switch to suspended state. Then it blocks until + * operation is completed. */ -void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); /** - * kbase_vinstr_hwc_resume - resumes hardware counter collection for - * a given kbase context + * kbase_vinstr_resume - resumes operation of a given vinstr context * @vinstr_ctx: vinstr context + * + * Function can be called only if it was preceded by a successful call + * to kbase_vinstr_suspend. */ -void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); /** * kbase_vinstr_dump_size - Return required size of dump buffer @@ -126,7 +147,7 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); /** * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @cli: Pointer to vinstr client + * @cli: pointer to vinstr client */ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h index 93fc5eaf273b..2be06a552768 100644 --- a/drivers/gpu/arm/midgard/mali_linux_trace.h +++ b/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -179,24 +179,6 @@ TRACE_EVENT(mali_total_alloc_pages_change, TP_printk("event=%lld", __entry->event_id) ); -/** - * mali_sw_counter - not currently used - * @event_id: counter id - */ -TRACE_EVENT(mali_sw_counter, - TP_PROTO(unsigned int event_id, signed long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(int, event_id) - __field(long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %d = %lld", __entry->event_id, __entry->value) -); - #endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index 7498407e6372..de6c206f3c71 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -214,6 +214,8 @@ #define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ #define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ #define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job + slot n */ #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ #define JS_STATUS 0x24 /* (RO) Status register for job slot n */ @@ -224,6 +226,8 @@ #define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ #define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ #define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for + job slot n */ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ @@ -394,6 +398,11 @@ #define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) #define JS_CONFIG_THREAD_PRI(n) ((n) << 16) +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + /* JS_STATUS register values */ /* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h index d4813f7f8a35..34f6d57382e2 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -73,8 +73,8 @@ extern struct kbase_pm_callback_conf pm_callbacks; /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c index 3baf3d96d41a..c65481810927 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,48 +66,76 @@ struct kbase_pm_callback_conf pm_callbacks = { }; /* - * Juno Secure Mode integration + * Juno Protected Mode integration */ /* SMC Function Numbers */ -#define JUNO_SMC_SECURE_ENABLE_FUNC 0xff06 -#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07 +#define JUNO_SMC_PROTECTED_ENTER_FUNC 0xff06 +#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07 -static int juno_secure_mode_enable(struct kbase_device *kbdev) +static int juno_protected_mode_enter(struct kbase_device *kbdev) { - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_PROTECTED_ENTER_FUNC, false, + 0, 0, 0); + return ret; +} - if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && - kbdev->reg_start == 0x2d000000) { - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_SECURE_ENABLE_FUNC, false, - 0, 0, 0); - return ret; - } - - return -EINVAL; /* Not supported */ +/* TODO: Remove these externs, reset should should be done by the firmware */ +extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx); + +extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx); + +static int juno_protected_mode_reset(struct kbase_device *kbdev) +{ + + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_PROTECTED_RESET_FUNC, false, + 0, 0, 0); + + /* TODO: Remove this reset, it should be done by the firmware */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET, NULL); + + while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) + & RESET_COMPLETED) != RESET_COMPLETED) + ; + + return ret; } -static int juno_secure_mode_disable(struct kbase_device *kbdev) +static bool juno_protected_mode_supported(struct kbase_device *kbdev) { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + /* + * Protected mode is only supported for the built in GPU + * _and_ only if the right firmware is running. + * + * Given that at init time the GPU is not powered up the + * juno_protected_mode_reset function can't be used as + * is needs to access GPU registers. + * However, although we don't want the GPU to boot into + * protected mode we know a GPU reset will be done after + * this function is called so although we set the GPU to + * protected mode it will exit protected mode before the + * driver is ready to run work. + */ if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && - kbdev->reg_start == 0x2d000000) { - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_SECURE_DISABLE_FUNC, false, - 0, 0, 0); - return ret; - } - - return -EINVAL; /* Not supported */ + (kbdev->reg_start == 0x2d000000)) + return juno_protected_mode_enter(kbdev) == 0; + + return false; } -struct kbase_secure_ops juno_secure_ops = { - .secure_mode_enable = juno_secure_mode_enable, - .secure_mode_disable = juno_secure_mode_disable, +struct kbase_protected_ops juno_protected_ops = { + .protected_mode_enter = juno_protected_mode_enter, + .protected_mode_reset = juno_protected_mode_reset, + .protected_mode_supported = juno_protected_mode_supported, }; static struct kbase_platform_config versatile_platform_config = { diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h index 5fc6d9e1c312..ab29e9d9c0a6 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,14 +71,14 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (&juno_secure_ops) +#define PROTECTED_CALLBACKS (&juno_protected_ops) extern struct kbase_pm_callback_conf pm_callbacks; #ifdef CONFIG_DEVFREQ_THERMAL extern struct devfreq_cooling_ops juno_model_ops; #endif -extern struct kbase_secure_ops juno_secure_ops; +extern struct kbase_protected_ops juno_protected_ops; diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index eb957d3b5731..dc4471beae67 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -73,10 +73,10 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h index 2b91d72bd93c..b0490ca331dd 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,10 +71,10 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h index d269c2591490..22ffccb3aaed 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -73,10 +73,10 @@ #define PLATFORM_FUNCS (NULL) /** - * Secure mode switch + * Protected mode switch * - * Attached value: pointer to @ref kbase_secure_ops + * Attached value: pointer to @ref kbase_protected_ops */ -#define SECURE_CALLBACKS (NULL) +#define PROTECTED_CALLBACKS (NULL) extern struct kbase_pm_callback_conf pm_callbacks;