From a9f9e723bcea23cb9a7ca5bfadf3d3e7932d81c6 Mon Sep 17 00:00:00 2001 From: chenzhen Date: Wed, 9 Sep 2015 09:28:51 +0800 Subject: [PATCH] MALI: rockchip: upgrade to DDK r7p0-02rel0. Conflicts: drivers/gpu/arm/midgard/mali_kbase_config.h Change-Id: I2d93041a0525ce6f1399c3a456c4c8f7be22243e Signed-off-by: chenzhen --- drivers/gpu/arm/midgard/Kbuild | 24 +- drivers/gpu/arm/midgard/Makefile | 13 +- drivers/gpu/arm/midgard/backend/gpu/Kbuild | 8 +- .../gpu/mali_kbase_debug_job_fault_backend.c | 157 ++ .../backend/gpu/mali_kbase_gpuprops_backend.c | 2 +- .../midgard/backend/gpu/mali_kbase_jm_defs.h | 7 +- .../midgard/backend/gpu/mali_kbase_jm_hw.c | 174 +- .../backend/gpu/mali_kbase_jm_internal.h | 30 - .../midgard/backend/gpu/mali_kbase_jm_rb.c | 143 +- .../midgard/backend/gpu/mali_kbase_jm_rb.h | 50 +- .../backend/gpu/mali_kbase_js_affinity.c | 3 +- .../backend/gpu/mali_kbase_js_affinity.h | 94 +- .../backend/gpu/mali_kbase_mmu_hw_direct.c | 37 +- .../backend/gpu/mali_kbase_mmu_hw_direct.h | 33 +- .../backend/gpu/mali_kbase_pm_always_on.c | 2 +- .../backend/gpu/mali_kbase_pm_backend.c | 6 +- .../midgard/backend/gpu/mali_kbase_pm_ca.c | 15 +- .../backend/gpu/mali_kbase_pm_ca_fixed.c | 8 +- .../backend/gpu/mali_kbase_pm_coarse_demand.c | 10 +- .../backend/gpu/mali_kbase_pm_coarse_demand.h | 15 +- .../midgard/backend/gpu/mali_kbase_pm_defs.h | 395 ++-- .../backend/gpu/mali_kbase_pm_demand.c | 10 +- .../backend/gpu/mali_kbase_pm_demand.h | 20 +- .../backend/gpu/mali_kbase_pm_driver.c | 316 +++- .../backend/gpu/mali_kbase_pm_internal.h | 349 ++-- .../backend/gpu/mali_kbase_pm_metrics.c | 294 +-- .../backend/gpu/mali_kbase_pm_policy.c | 129 +- .../backend/gpu/mali_kbase_pm_policy.h | 4 +- .../arm/midgard/backend/gpu/mali_kbase_time.c | 5 - .../arm/midgard/backend/gpu/mali_kbase_time.h | 7 +- .../arm/midgard/mali_base_hwconfig_features.h | 2 + .../arm/midgard/mali_base_hwconfig_issues.h | 30 +- drivers/gpu/arm/midgard/mali_base_kernel.h | 78 +- drivers/gpu/arm/midgard/mali_kbase.h | 42 +- .../gpu/arm/midgard/mali_kbase_cache_policy.c | 7 - .../gpu/arm/midgard/mali_kbase_cache_policy.h | 2 +- drivers/gpu/arm/midgard/mali_kbase_config.h | 17 +- drivers/gpu/arm/midgard/mali_kbase_context.c | 43 +- .../gpu/arm/midgard/mali_kbase_core_linux.c | 687 ++++--- drivers/gpu/arm/midgard/mali_kbase_cpuprops.c | 125 -- drivers/gpu/arm/midgard/mali_kbase_cpuprops.h | 53 - .../arm/midgard/mali_kbase_debug_job_fault.c | 447 +++++ .../arm/midgard/mali_kbase_debug_job_fault.h | 82 + drivers/gpu/arm/midgard/mali_kbase_defs.h | 118 +- drivers/gpu/arm/midgard/mali_kbase_device.c | 22 +- drivers/gpu/arm/midgard/mali_kbase_event.c | 69 +- .../gpu/arm/midgard/mali_kbase_gator_api.c | 10 +- drivers/gpu/arm/midgard/mali_kbase_gpuprops.c | 9 +- .../arm/midgard/mali_kbase_gpuprops_types.h | 2 +- drivers/gpu/arm/midgard/mali_kbase_hw.c | 4 + .../gpu/arm/midgard/mali_kbase_hwaccess_jm.h | 47 +- .../gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 66 + drivers/gpu/arm/midgard/mali_kbase_instr.c | 38 +- drivers/gpu/arm/midgard/mali_kbase_instr.h | 10 - drivers/gpu/arm/midgard/mali_kbase_ipa.c | 264 +++ ...se_pm_metrics_dummy.c => mali_kbase_ipa.h} | 29 +- drivers/gpu/arm/midgard/mali_kbase_jd.c | 169 +- drivers/gpu/arm/midgard/mali_kbase_jm.c | 4 +- drivers/gpu/arm/midgard/mali_kbase_js.c | 155 +- drivers/gpu/arm/midgard/mali_kbase_js.h | 19 +- .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 8 +- drivers/gpu/arm/midgard/mali_kbase_js_defs.h | 8 +- drivers/gpu/arm/midgard/mali_kbase_mem.c | 49 +- drivers/gpu/arm/midgard/mali_kbase_mem.h | 213 ++- .../gpu/arm/midgard/mali_kbase_mem_alloc.c | 292 --- .../gpu/arm/midgard/mali_kbase_mem_alloc.h | 32 - .../midgard/mali_kbase_mem_alloc_carveout.c | 402 ---- .../gpu/arm/midgard/mali_kbase_mem_linux.c | 226 ++- drivers/gpu/arm/midgard/mali_kbase_mem_pool.c | 574 ++++++ .../arm/midgard/mali_kbase_mem_pool_debugfs.c | 81 + .../arm/midgard/mali_kbase_mem_pool_debugfs.h | 36 + drivers/gpu/arm/midgard/mali_kbase_mmu.c | 177 +- drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h | 6 +- drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h | 3 + .../arm/midgard/mali_kbase_mmu_mode_lpae.c | 25 +- drivers/gpu/arm/midgard/mali_kbase_replay.c | 33 +- drivers/gpu/arm/midgard/mali_kbase_smc.c | 44 +- drivers/gpu/arm/midgard/mali_kbase_smc.h | 37 +- drivers/gpu/arm/midgard/mali_kbase_softjobs.c | 6 +- drivers/gpu/arm/midgard/mali_kbase_tlstream.c | 311 +++- drivers/gpu/arm/midgard/mali_kbase_tlstream.h | 96 + .../arm/midgard/mali_kbase_trace_timeline.h | 2 + .../midgard/mali_kbase_trace_timeline_defs.h | 12 +- drivers/gpu/arm/midgard/mali_kbase_uku.h | 183 +- drivers/gpu/arm/midgard/mali_kbase_vinstr.c | 1609 ++++++++++++++--- drivers/gpu/arm/midgard/mali_kbase_vinstr.h | 104 +- drivers/gpu/arm/midgard/mali_midg_regmap.h | 6 +- drivers/gpu/arm/midgard/mali_timeline.h | 3 +- .../devicetree/mali_kbase_runtime_pm.c | 6 +- .../juno_soc/mali_kbase_config_juno_soc.c | 39 +- drivers/gpu/arm/midgard/sconscript | 45 +- 91 files changed, 6345 insertions(+), 3333 deletions(-) create mode 100644 drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_cpuprops.c delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_cpuprops.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h create mode 100644 drivers/gpu/arm/midgard/mali_kbase_ipa.c rename drivers/gpu/arm/midgard/{backend/gpu/mali_kbase_pm_metrics_dummy.c => mali_kbase_ipa.h} (54%) delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h delete mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_pool.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c create mode 100644 drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index f43849831400..015521cd7d43 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r6p0-02rel0" +MALI_RELEASE_NAME ?= "r7p0-02rel0" # Paths required for build KBASE_PATH = $(src) @@ -77,10 +77,10 @@ SRC := \ mali_kbase_cache_policy.c \ mali_kbase_mem.c \ mali_kbase_mmu.c \ + mali_kbase_ipa.c \ mali_kbase_jd.c \ mali_kbase_jd_debugfs.c \ mali_kbase_jm.c \ - mali_kbase_cpuprops.c \ mali_kbase_gpuprops.c \ mali_kbase_js.c \ mali_kbase_js_ctx_attr.c \ @@ -108,7 +108,10 @@ SRC := \ mali_kbase_disjoint_events.c \ mali_kbase_gator_api.c \ mali_kbase_debug_mem_view.c \ - mali_kbase_smc.c + mali_kbase_debug_job_fault.c \ + mali_kbase_smc.c \ + mali_kbase_mem_pool.c \ + mali_kbase_mem_pool_debugfs.c ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) SRC += mali_kbase_tlstream.c @@ -120,23 +123,8 @@ endif # Job Scheduler Policy: Completely Fair Scheduler SRC += mali_kbase_js_policy_cfs.c -ifeq ($(CONFIG_MACH_MANTA),y) - SRC += mali_kbase_mem_alloc_carveout.c -else - SRC += mali_kbase_mem_alloc.c -endif - ccflags-y += -I$(KBASE_PATH) -# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present -ifeq ($(KBUILD_EXTMOD),) -# in-tree -MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard -else -# out-of-tree -MALI_METRICS_PATH = $(KBUILD_EXTMOD) -endif - ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y) SRC += mali_kbase_platform_fake.c diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile index ded089243e6d..d4d5de4cd512 100644 --- a/drivers/gpu/arm/midgard/Makefile +++ b/drivers/gpu/arm/midgard/Makefile @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -16,6 +16,7 @@ KDIR ?= /lib/modules/$(shell uname -r)/build +BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump KBASE_PATH_RELATIVE = $(CURDIR) KDS_PATH_RELATIVE = $(CURDIR)/../../../.. @@ -23,7 +24,15 @@ EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers ifeq ($(MALI_UNIT_TEST), 1) EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers - EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/../../../../tests/kutf/Module.symvers +endif + +ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),) +include $(CURDIR)/internal/Makefile.in +endif + +ifeq ($(MALI_BUS_LOG), 1) +#Add bus logger symbols +EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers endif # GPL driver supports KDS diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild index 05a7a826c5df..df4e796dc0d8 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild +++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -18,6 +18,7 @@ BACKEND += \ backend/gpu/mali_kbase_device_hw.c \ backend/gpu/mali_kbase_gpu.c \ backend/gpu/mali_kbase_gpuprops_backend.c \ + backend/gpu/mali_kbase_debug_job_fault_backend.c \ backend/gpu/mali_kbase_irq_linux.c \ backend/gpu/mali_kbase_instr_backend.c \ backend/gpu/mali_kbase_jm_as.c \ @@ -55,10 +56,3 @@ ifeq ($(CONFIG_MALI_NO_MALI),y) # HW error simulation BACKEND += backend/gpu/mali_kbase_model_error_generator.c endif - -ifeq ($(wildcard $(MALI_METRICS_PATH)/backend/gpu/mali_kbase_pm_metrics_linux.c),) - BACKEND += backend/gpu/mali_kbase_pm_metrics_dummy.c -else - BACKEND += backend/gpu/mali_kbase_pm_metrics_linux.c -endif - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c new file mode 100644 index 000000000000..7851ea6466c7 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include +#include "mali_kbase_debug_job_fault.h" + +#ifdef CONFIG_DEBUG_FS + +/*GPU_CONTROL_REG(r)*/ +static int gpu_control_reg_snapshot[] = { + GPU_ID, + SHADER_READY_LO, + SHADER_READY_HI, + TILER_READY_LO, + TILER_READY_HI, + L2_READY_LO, + L2_READY_HI +}; + +/* JOB_CONTROL_REG(r) */ +static int job_control_reg_snapshot[] = { + JOB_IRQ_MASK, + JOB_IRQ_STATUS +}; + +/* JOB_SLOT_REG(n,r) */ +static int job_slot_reg_snapshot[] = { + JS_HEAD_LO, + JS_HEAD_HI, + JS_TAIL_LO, + JS_TAIL_HI, + JS_AFFINITY_LO, + JS_AFFINITY_HI, + JS_CONFIG, + JS_STATUS, + JS_HEAD_NEXT_LO, + JS_HEAD_NEXT_HI, + JS_AFFINITY_NEXT_LO, + JS_AFFINITY_NEXT_HI, + JS_CONFIG_NEXT +}; + +/*MMU_REG(r)*/ +static int mmu_reg_snapshot[] = { + MMU_IRQ_MASK, + MMU_IRQ_STATUS +}; + +/* MMU_AS_REG(n,r) */ +static int as_reg_snapshot[] = { + AS_TRANSTAB_LO, + AS_TRANSTAB_HI, + AS_MEMATTR_LO, + AS_MEMATTR_HI, + AS_FAULTSTATUS, + AS_FAULTADDRESS_LO, + AS_FAULTADDRESS_HI, + AS_STATUS +}; + +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range) +{ + int i, j; + int offset = 0; + int slot_number; + int as_number; + + if (kctx->reg_dump == NULL) + return false; + + slot_number = kctx->kbdev->gpu_props.num_job_slots; + as_number = kctx->kbdev->gpu_props.num_address_spaces; + + /* get the GPU control registers*/ + for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job control registers*/ + for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_CONTROL_REG(job_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job Slot registers*/ + for (j = 0; j < slot_number; j++) { + for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); + offset += 2; + } + } + + /* get the MMU registers*/ + for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + offset += 2; + } + + /* get the Address space registers*/ + for (j = 0; j < as_number; j++) { + for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + MMU_AS_REG(j, as_reg_snapshot[i]); + offset += 2; + } + } + + WARN_ON(offset >= (reg_range*2/4)); + + /* set the termination flag*/ + kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; + kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; + + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", + offset); + + return true; +} + +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ + int offset = 0; + + if (kctx->reg_dump == NULL) + return false; + + while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { + kctx->reg_dump[offset+1] = + kbase_reg_read(kctx->kbdev, + kctx->reg_dump[offset], NULL); + offset += 2; + } + return true; +} + + +#endif diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index bc7235f5833f..591c013c5349 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 1598fce4c281..57c64f7db93f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -63,6 +63,11 @@ struct slot_rb { * @scheduling_timer: The timer tick used for rescheduling jobs * @timer_running: Is the timer running? The runpool_mutex must be * held whilst modifying this. + * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) + * @reset_workq: Work queue for performing the reset + * @reset_work: Work item for performing the reset + * @reset_wait: Wait event signalled when the reset is complete + * @reset_timer: Timeout for soft-stops before the reset * * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when * accessing this structure @@ -76,7 +81,6 @@ struct kbase_backend_data { bool timer_running; - /* Set when we're about to reset the GPU */ atomic_t reset_gpu; /* The GPU reset isn't pending */ @@ -90,7 +94,6 @@ struct kbase_backend_data { * kbasep_try_reset_gpu_early was called) */ #define KBASE_RESET_GPU_HAPPENING 3 - /* Work queue and work item for performing the reset in */ struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 508394ea0256..a4b0c26cddec 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -116,8 +116,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * start */ cfg = kctx->as_nr; +#ifndef CONFIG_MALI_COH_GPU cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; +#endif cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); @@ -158,6 +160,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kctx, kbase_jd_atom_id(kctx, katom)); #endif #if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_attrib_atom_config(katom, jc_head, + katom->affinity, cfg); + kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]); kbase_tlstream_tl_ret_atom_lpu( katom, &kbdev->gpu_props.props.raw_props.js_features[js]); @@ -695,6 +700,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) js_devdata = &kbdev->js_data; /* Cancel any remaining running jobs for this kctx */ + mutex_lock(&kctx->jctx.lock); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* Invalidate all jobs in context, to prevent re-submitting */ @@ -708,6 +714,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) kbase_job_slot_hardstop(kctx, i, NULL); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&kctx->jctx.lock); } void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, @@ -872,6 +879,66 @@ void kbase_job_slot_term(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_job_slot_term); +#if KBASE_GPU_RESET_EN +/** + * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot + * @kbdev: kbase device pointer + * @kctx: context to check against + * @js: slot to check + * @target_katom: An atom to check, or NULL if all atoms from @kctx on + * slot @js should be checked + * + * This checks are based upon parameters that would normally be passed to + * kbase_job_slot_hardstop(). + * + * In the event of @target_katom being NULL, this will check the last jobs that + * are likely to be running on the slot to see if a) they belong to kctx, and + * so would be stopped, and b) whether they have AFBC + * + * In that case, It's guaranteed that a job currently executing on the HW with + * AFBC will be detected. However, this is a conservative check because it also + * detects jobs that have just completed too. + * + * Return: true when hard-stop _might_ stop an afbc atom, else false. + */ +static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + bool ret = false; + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + /* When we have an atom the decision can be made straight away. */ + if (target_katom) + return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC); + + /* Otherwise, we must chweck the hardware to see if it has atoms from + * this context with AFBC. */ + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + /* Ignore atoms from other contexts, they won't be stopped when + * we use this for checking if we should hard-stop them */ + if (katom->kctx != kctx) + continue; + + /* An atom on this slot and this context: check for AFBC */ + if (katom->core_req & BASE_JD_REQ_FS_AFBC) { + ret = true; + break; + } + } + + return ret; +} +#endif /* KBASE_GPU_RESET_EN */ + /** * kbase_job_slot_softstop_swflags - Soft-stop a job with flags * @kbdev: The kbase device @@ -926,16 +993,24 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev = kctx->kbdev; + bool stopped; +#if KBASE_GPU_RESET_EN + /* We make the check for AFBC before evicting/stopping atoms. Note + * that no other thread can modify the slots whilst we have the + * runpool_irq lock. */ + int needs_workaround_for_afbc = + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) + && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, + target_katom); +#endif - bool stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, + stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, JS_COMMAND_HARD_STOP); #if KBASE_GPU_RESET_EN if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || - kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || - (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) && - (target_katom == NULL || target_katom->core_req & - BASE_JD_REQ_FS_AFBC)))) { + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || + needs_workaround_for_afbc)) { /* MIDBASE-2916 if a fragment job with AFBC encoding is * hardstopped, ensure to do a soft reset also in order to * clear the GPU status. @@ -1161,8 +1236,49 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); + + /* Re-enabled IRQs */ kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask); - /* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */ + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + mutex_unlock(&kbdev->pm.lock); + + mutex_lock(&js_devdata->runpool_mutex); + + /* Reprogram the GPU's MMU */ + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + mutex_lock(&as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (js_devdata->runpool_irq.per_as_data[i].kctx) + kbase_mmu_update( + js_devdata->runpool_irq.per_as_data[i].kctx); + else + kbase_mmu_disable_as(kbdev, i); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&as->transaction_mutex); + } + + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + kbase_disjoint_state_down(kbdev); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + dev_err(kbdev->dev, "Reset complete"); + + if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) + try_schedule = true; + + mutex_unlock(&js_devdata->runpool_mutex); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Restore the HW counters setup */ @@ -1246,45 +1362,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) break; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - /* Complete any jobs that were still on the GPU */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbase_backend_reset(kbdev, &end_timestamp); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - - mutex_unlock(&kbdev->pm.lock); - - mutex_lock(&js_devdata->runpool_mutex); - - /* Reprogram the GPU's MMU */ - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - mutex_lock(&as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - - if (js_devdata->runpool_irq.per_as_data[i].kctx) - kbase_mmu_update( - js_devdata->runpool_irq.per_as_data[i].kctx); - else - kbase_mmu_disable_as(kbdev, i); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&as->transaction_mutex); - } - - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - - kbase_disjoint_state_down(kbdev); - - wake_up(&kbdev->hwaccess.backend.reset_wait); - dev_err(kbdev->dev, "Reset complete"); - - if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) - try_schedule = true; - - mutex_unlock(&js_devdata->runpool_mutex); + /* Note: counter dumping may now resume */ mutex_lock(&kbdev->pm.lock); @@ -1346,6 +1424,12 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) return; } + /* To prevent getting incorrect registers when dumping failed job, + * skip early reset. + */ + if (kbdev->job_fault_debug != false) + return; + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has * been called), and that no other thread beat this thread to starting * the reset */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index 3b959d5b4472..eb068d40283b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -152,34 +152,4 @@ void kbase_job_slot_halt(struct kbase_device *kbdev); */ void kbase_job_slot_term(struct kbase_device *kbdev); -#if KBASE_GPU_RESET_EN -/** - * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. - * @kbdev: Device pointer - * - * This function just soft-stops all the slots to ensure that as many jobs as - * possible are saved. - * - * Return: a boolean which should be interpreted as follows: - * - true - Prepared for reset, kbase_reset_gpu should be called. - * - false - Another thread is performing a reset, kbase_reset_gpu should - * not be called. - */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_locked - Reset the GPU - * @kbdev: Device pointer - * - * This function should be called after kbase_prepare_to_reset_gpu if it - * returns true. It should never be called without a corresponding call to - * kbase_prepare_to_reset_gpu. - * - * After this function is called (or not called if kbase_prepare_to_reset_gpu - * returned false), the caller should wait for kbdev->reset_waitq to be - * signalled to know when the reset has completed. - */ -void kbase_reset_gpu_locked(struct kbase_device *kbdev); -#endif - #endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index 1bf9c7097523..860171810667 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -39,7 +39,8 @@ #define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); + struct kbase_jd_atom *katom, + ktime_t *end_timestamp); /** * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer @@ -66,15 +67,18 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, /** * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once * it has been completed - * @kbdev: Device pointer - * @js: Job slot to remove atom from + * @kbdev: Device pointer + * @js: Job slot to remove atom from + * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in + * which case current time will be used. * * Context: Caller must hold the HW access lock * * Return: Atom removed from ringbuffer */ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, - int js) + int js, + ktime_t *end_timestamp) { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; struct kbase_jd_atom *katom; @@ -88,7 +92,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; - kbase_gpu_release_atom(kbdev, katom); + kbase_gpu_release_atom(kbdev, katom, end_timestamp); rb->read_idx++; @@ -519,9 +523,54 @@ static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; } +static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, + base_jd_core_req core_req, u64 affinity, + enum kbase_atom_coreref_state coreref_state) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (coreref_state) { + case KBASE_ATOM_COREREF_STATE_READY: + /* State where atom was submitted to the HW - just proceed to + * power-down */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + + /* *** FALLTHROUGH *** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + /* State where cores were registered */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T, + affinity); + + break; + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + /* State where cores were requested, but not registered */ + KBASE_DEBUG_ASSERT(affinity != 0 || + (core_req & BASE_JD_REQ_T)); + kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T, + affinity); + break; + + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + /* Initial state - nothing required */ + KBASE_DEBUG_ASSERT(affinity == 0); + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled coreref_state: %d", + coreref_state); + break; + } +} static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) + struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { switch (katom->gpu_rb_state) { case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: @@ -530,9 +579,11 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: - /* Inform power management at start/finish of atom - * so it can update its GPU utilisation metrics. */ - kbase_pm_metrics_release_atom(kbdev, katom); + /* Inform power management at start/finish of atom so it can + * update its GPU utilisation metrics. Mark atom as not + * submitted beforehand. */ + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_pm_metrics_update(kbdev, end_timestamp); if (katom->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter(kbdev); @@ -550,7 +601,6 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: - kbasep_js_job_check_deref_cores(kbdev, katom); break; case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: @@ -566,7 +616,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - kbase_gpu_release_atom(kbdev, katom); + kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -606,7 +656,7 @@ static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev) { - return kbdev->js_data.runpool_irq.secure_mode; + return kbdev->secure_mode; } static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) @@ -625,7 +675,7 @@ static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) if (err) dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err); else - kbdev->js_data.runpool_irq.secure_mode = true; + kbdev->secure_mode = true; } return err; @@ -647,7 +697,7 @@ static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev) if (err) dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); else - kbdev->js_data.runpool_irq.secure_mode = false; + kbdev->secure_mode = false; } return err; @@ -746,6 +796,14 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) /* Failed to switch secure mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } break; } } @@ -781,15 +839,16 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); - /* Inform power management at start/finish of - * atom so it can update its GPU utilisation - * metrics. */ - kbase_pm_metrics_run_atom(kbdev, katom[idx]); - kbase_job_hw_submit(kbdev, katom[idx], js); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_SUBMITTED: @@ -802,7 +861,7 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) * in order */ if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js); + kbase_gpu_dequeue_atom(kbdev, js, NULL); kbase_jm_return_atom_to_js(kbdev, katom[idx]); } @@ -884,7 +943,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } } - katom = kbase_gpu_dequeue_atom(kbdev, js); + katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); @@ -899,7 +958,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * the atoms on this slot are returned in the correct order. */ if (next_katom && katom->kctx == next_katom->kctx) { - kbase_gpu_dequeue_atom(kbdev, js); + kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); kbase_jm_return_atom_to_js(kbdev, next_katom); } } else if (completion_code != BASE_JD_EVENT_DONE) { @@ -927,14 +986,15 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx0 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i); + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); if (katom_idx1 && katom_idx1->kctx == katom->kctx && katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Dequeue katom_idx1 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i); + kbase_gpu_dequeue_atom(kbdev, i, + end_timestamp); katom_idx1->event_code = BASE_JD_EVENT_STOPPED; @@ -1043,8 +1103,8 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) enum kbase_atom_gpu_rb_state gpu_rb_state = katom->gpu_rb_state; - kbase_gpu_release_atom(kbdev, katom); - kbase_gpu_dequeue_atom(kbdev, js); + kbase_gpu_release_atom(kbdev, katom, NULL); + kbase_gpu_dequeue_atom(kbdev, js, NULL); if (gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { @@ -1156,9 +1216,9 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, if (katom_idx0_valid) { if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Simple case - just dequeue and return */ - kbase_gpu_dequeue_atom(kbdev, js); + kbase_gpu_dequeue_atom(kbdev, js, NULL); if (katom_idx1_valid) { - kbase_gpu_dequeue_atom(kbdev, js); + kbase_gpu_dequeue_atom(kbdev, js, NULL); katom_idx1->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_jm_return_atom_to_js(kbdev, katom_idx1); @@ -1303,7 +1363,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, return ret; } -static void kbasep_gpu_cacheclean(struct kbase_device *kbdev, +void kbase_gpu_cacheclean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { /* Limit the number of loops to avoid a hang if the interrupt is missed @@ -1347,7 +1407,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * now */ if (katom->need_cache_flush_cores_retained) { - kbasep_gpu_cacheclean(kbdev, katom); + kbase_gpu_cacheclean(kbdev, katom); katom->need_cache_flush_cores_retained = 0; } @@ -1368,6 +1428,29 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; } } + + /* Clear the coreref_state now - while check_deref_cores() may not have + * been called yet, the caller will have taken a copy of this field. If + * this is not done, then if the atom is re-scheduled (following a soft + * stop) then the core reference would not be retaken. */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->affinity = 0; +} + +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req, u64 affinity, + enum kbase_atom_coreref_state coreref_state) +{ + kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, + coreref_state); + + if (!kbdev->pm.active_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbase_pm_update_active(kbdev); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } } void kbase_gpu_dump_slots(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h index 13d600c47b18..102d94be93d1 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -16,8 +16,7 @@ -/** - * @file mali_kbase_hwaccess_gpu.h +/* * Register-based HW access backend specific APIs */ @@ -27,23 +26,27 @@ #include /** + * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * + * @kbdev: Device pointer + * @js: Job slot to evict from + * * Evict the atom in the NEXT slot for the specified job slot. This function is * called from the job complete IRQ handler when the previous job has failed. * - * @param[in] kbdev Device pointer - * @param[in] js Job slot to evict from - * @return true if job evicted from NEXT registers - * false otherwise + * Return: true if job evicted from NEXT registers, false otherwise */ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); /** - * Complete an atom on job slot js + * kbase_gpu_complete_hw - Complete an atom on job slot js * - * @param[in] kbdev Device pointer - * @param[in] js Job slot that has completed - * @param[in] event_code Event code from job that has completed - * @param[in] end_timestamp Time of completion + * @kbdev: Device pointer + * @js: Job slot that has completed + * @completion_code: Event code from job that has completed + * @job_tail: The tail address from the hardware if the job has partially + * completed + * @end_timestamp: Time of completion */ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, u32 completion_code, @@ -51,32 +54,33 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, ktime_t *end_timestamp); /** - * Inspect the contents of the HW access ringbuffer + * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer * - * @param[in] kbdev Device pointer - * @param[in] js Job slot to inspect - * @param[in] idx Index into ringbuffer. 0 is the job currently running on - * the slot, 1 is the job waiting, all other values are - * invalid. - * @return The atom at that position in the ringbuffer - * NULL if no atom present + * @kbdev: Device pointer + * @js: Job slot to inspect + * @idx: Index into ringbuffer. 0 is the job currently running on + * the slot, 1 is the job waiting, all other values are invalid. + * Return: The atom at that position in the ringbuffer + * or NULL if no atom present */ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, int idx); /** + * kbase_gpu_slot_update - Update state based on slot ringbuffers + * + * @kbdev: Device pointer + * * Inspect the jobs in the slot ringbuffers and update state. * * This will cause jobs to be submitted to hardware if they are unblocked - * - * @param[in] kbdev Device pointer */ void kbase_gpu_slot_update(struct kbase_device *kbdev); /** - * Print the contents of the slot ringbuffers + * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers * - * @param[in] kbdev Device pointer + * @kbdev: Device pointer */ void kbase_gpu_dump_slots(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 1e29d5fb2235..89b8085e28b2 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_js_affinity.c +/* * Base kernel affinity manager APIs */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h index 541e554983ef..3026e6a58303 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_js_affinity.h +/* * Affinity Manager internal APIs. */ @@ -34,24 +33,11 @@ extern u64 mali_js2_affinity_mask; /** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js_affinity Affinity Manager internal APIs. - * @{ + * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to + * submit a job to a particular job slot in the current status * - */ - -/** - * @brief Decide whether it is possible to submit a job to a particular job slot - * in the current status + * @kbdev: The kbase device structure of the device + * @js: Job slot number to check for allowance * * Will check if submitting to the given job slot is allowed in the current * status. For example using job slot 2 while in soft-stoppable state and only @@ -59,28 +45,25 @@ extern u64 mali_js2_affinity_mask; * called prior to submitting a job to a slot to make sure policy rules are not * violated. * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_device_data::runpool_irq::lock - * - * @param kbdev The kbase device structure of the device - * @param js Job slot number to check for allowance + * The following locking conditions are made on the caller + * - it must hold kbasep_js_device_data.runpool_irq.lock */ bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); /** - * @brief Compute affinity for a given job. + * kbase_js_choose_affinity - Compute affinity for a given job. + * + * @affinity: Affinity bitmap computed + * @kbdev: The kbase device structure of the device + * @katom: Job chain of which affinity is going to be found + * @js: Slot the job chain is being submitted * * Currently assumes an all-on/all-off power management policy. * Also assumes there is at least one core with tiler available. * * Returns true if a valid affinity was chosen, false if * no cores were available. - * - * @param[out] affinity Affinity bitmap computed - * @param kbdev The kbase device structure of the device - * @param katom Job chain of which affinity is going to be found - * @param js Slot the job chain is being submitted */ bool kbase_js_choose_affinity(u64 * const affinity, struct kbase_device *kbdev, @@ -88,40 +71,60 @@ bool kbase_js_choose_affinity(u64 * const affinity, int js); /** - * @brief Determine whether a proposed \a affinity on job slot \a js would - * cause a violation of affinity restrictions. + * kbase_js_affinity_would_violate - Determine whether a proposed affinity on + * job slot @js would cause a violation of affinity restrictions. * - * The following locks must be held by the caller: - * - kbasep_js_device_data::runpool_irq::lock + * @kbdev: Kbase device structure + * @js: The job slot to test + * @affinity: The affinity mask to test + * + * The following locks must be held by the caller + * - kbasep_js_device_data.runpool_irq.lock + * + * Return: true if the affinity would violate the restrictions */ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, u64 affinity); /** - * @brief Affinity tracking: retain cores used by a slot + * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by + * a slot + * + * @kbdev: Kbase device structure + * @js: The job slot retaining the cores + * @affinity: The cores to retain * - * The following locks must be held by the caller: - * - kbasep_js_device_data::runpool_irq::lock + * The following locks must be held by the caller + * - kbasep_js_device_data.runpool_irq.lock */ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); /** - * @brief Affinity tracking: release cores used by a slot + * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used + * by a slot * - * Cores \b must be released as soon as a job is dequeued from a slot's 'submit + * @kbdev: Kbase device structure + * @js: Job slot + * @affinity: Bit mask of core to be released + * + * Cores must be released as soon as a job is dequeued from a slot's 'submit * slots', and before another job is submitted to those slots. Otherwise, the * refcount could exceed the maximum number submittable to a slot, - * BASE_JM_SUBMIT_SLOTS. + * %BASE_JM_SUBMIT_SLOTS. * - * The following locks must be held by the caller: - * - kbasep_js_device_data::runpool_irq::lock + * The following locks must be held by the caller + * - kbasep_js_device_data.runpool_irq.lock */ void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); /** - * @brief Output to the Trace log the current tracked affinities on all slots + * kbase_js_debug_log_current_affinities - log the current affinities + * + * @kbdev: Kbase device structure + * + * Output to the Trace log the current tracked affinities on all slots */ #if KBASE_TRACE_ENABLE void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); @@ -132,9 +135,4 @@ kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) } #endif /* KBASE_TRACE_ENABLE */ - /** @} *//* end group kbase_js_affinity */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - - #endif /* _KBASE_JS_AFFINITY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index e06dc58621f9..1b613a1967c9 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -20,6 +20,9 @@ #include #include #include +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include +#endif #include #include @@ -63,19 +66,22 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr, struct kbase_context *kctx) { unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); - /* Wait for the MMU status to indicate there is no active command. */ - while (--max_loops && kbase_reg_read(kbdev, - MMU_AS_REG(as_nr, AS_STATUS), - kctx) & AS_STATUS_AS_ACTIVE) { - ; - } + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. Do not log remaining register accesses. */ + while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) + val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL); if (max_loops == 0) { dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); return -1; } + /* If waiting in loop was performed, log last read value. */ + if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + return 0; } @@ -197,6 +203,12 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; +#if defined(CONFIG_MALI_MIPE_ENABLED) || \ + (defined(MALI_INCLUDE_TMIX) && \ + defined(CONFIG_MALI_COH_PAGES) && \ + defined(CONFIG_MALI_GPU_MMU_AARCH64)) + u32 transcfg = 0; +#endif kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), @@ -209,6 +221,13 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_attrib_as_config(as, + current_setup->transtab, + current_setup->memattr, + transcfg); +#endif + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); } @@ -267,7 +286,8 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, /* Clear the page (and bus fault IRQ as well in case one occurred) */ pf_bf_mask = MMU_PAGE_FAULT(as->number); - if (type == KBASE_MMU_FAULT_TYPE_BUS) + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) pf_bf_mask |= MMU_BUS_ERROR(as->number); kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); @@ -286,7 +306,8 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | MMU_PAGE_FAULT(as->number); - if (type == KBASE_MMU_FAULT_TYPE_BUS) + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) irq_mask |= MMU_BUS_ERROR(as->number); kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h index a8b3b76f47e1..c02253c6acc3 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,18 +15,13 @@ -/** - * @file +/* * Interface file for the direct implementation for MMU hardware access - */ - -/** - * @page mali_kbase_mmu_hw_direct_page Direct MMU hardware interface * - * @section mali_kbase_mmu_hw_direct_intro_sec Introduction + * Direct MMU hardware interface + * * This module provides the interface(s) that are required by the direct * register access implementation of the MMU hardware interface - * @ref mali_kbase_mmu_hw_page . */ #ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ @@ -35,25 +30,13 @@ #include /** - * @addtogroup mali_kbase_mmu_hw - * @{ - */ - -/** - * @addtogroup mali_kbase_mmu_hw_direct Direct register access to MMU - * @{ - */ - -/** @brief Process an MMU interrupt. + * kbase_mmu_interrupt - Process an MMU interrupt. * - * Process the MMU interrupt that was reported by the @ref kbase_device. + * Process the MMU interrupt that was reported by the &kbase_device. * - * @param[in] kbdev kbase context to clear the fault from. - * @param[in] irq_stat Value of the MMU_IRQ_STATUS register + * @kbdev: kbase context to clear the fault from. + * @irq_stat: Value of the MMU_IRQ_STATUS register */ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); -/** @} *//* end group mali_kbase_mmu_hw_direct */ -/** @} *//* end group mali_kbase_mmu_hw */ - #endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c index d18ae86f8dda..0614348e935a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -26,7 +26,7 @@ static u64 always_on_get_core_mask(struct kbase_device *kbdev) { - return kbdev->shader_present_bitmap; + return kbdev->gpu_props.props.raw_props.shader_present; } static bool always_on_get_core_active(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index bbf39b0ce03f..9ff7baadec7a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -16,8 +16,7 @@ -/** - * @file mali_kbase_pm_hwaccess.c +/* * GPU backend implementation of base kernel power management APIs */ @@ -228,7 +227,8 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, kbasep_pm_read_present_cores(kbdev); - kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap; + kbdev->pm.debug_core_mask = + kbdev->gpu_props.props.raw_props.shader_present; /* Pretend the GPU is active to prevent a power policy turning the GPU * cores off */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index 521958aa1741..0d9ee5879142 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -15,8 +15,7 @@ -/** - * @file mali_kbase_pm_ca.c +/* * Base kernel core availability APIs */ @@ -31,8 +30,10 @@ static const struct kbase_pm_ca_policy *const policy_list[] = { #endif }; -/** The number of policies available in the system. - * This is derived from the number of functions listed in policy_get_functions. +/** + * POLICY_COUNT - The number of policies available in the system. + * + * This is derived from the number of functions listed in policy_list. */ #define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) @@ -134,10 +135,12 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) - return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask; if (kbdev->pm.backend.ca_current_policy == NULL) - return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask; return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & kbdev->pm.debug_core_mask; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c index f890a64e8bb7..864612d31f9b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c @@ -15,8 +15,7 @@ -/** - * @file mali_kbase_pm_ca_fixed.c +/* * A power policy implementing fixed core availability */ @@ -35,7 +34,7 @@ static void fixed_term(struct kbase_device *kbdev) static u64 fixed_get_core_mask(struct kbase_device *kbdev) { - return kbdev->shader_present_bitmap; + return kbdev->gpu_props.props.raw_props.shader_present; } static void fixed_update_core_status(struct kbase_device *kbdev, @@ -47,7 +46,8 @@ static void fixed_update_core_status(struct kbase_device *kbdev, CSTD_UNUSED(cores_transitioning); } -/** The @ref struct kbase_pm_policy structure for the fixed power policy. +/* + * The struct kbase_pm_policy structure for the fixed power policy. * * This is the static structure that defines the fixed power policy's callback * and name. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c index 3decd0312b6b..487391168e25 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_coarse_demand.c +/* * "Coarse Demand" power management policy */ @@ -30,12 +29,13 @@ static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) if (kbdev->pm.active_count == 0) return 0; - return kbdev->shader_present_bitmap; + return kbdev->gpu_props.props.raw_props.shader_present; } static bool coarse_demand_get_core_active(struct kbase_device *kbdev) { - if (kbdev->pm.active_count == 0) + if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | + kbdev->shader_inuse_bitmap)) return false; return true; @@ -51,7 +51,7 @@ static void coarse_demand_term(struct kbase_device *kbdev) CSTD_UNUSED(kbdev); } -/** The @ref struct kbase_pm_policy structure for the demand power policy. +/* The struct kbase_pm_policy structure for the demand power policy. * * This is the static structure that defines the demand power policy's callback * and name. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h index dd1e8f710b36..749d305eee9a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_coarse_demand.h +/* * "Coarse Demand" power management policy */ @@ -26,6 +25,7 @@ #define MALI_KBASE_PM_COARSE_DEMAND_H /** + * DOC: * The "Coarse" demand power management policy has the following * characteristics: * - When KBase indicates that the GPU will be powered up, but we don't yet @@ -48,13 +48,14 @@ */ /** - * Private structure for policy instance data. + * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand + * policy + * + * This contains data that is private to the coarse demand power policy. * - * This contains data that is private to the particular power policy that is - * active. + * @dummy: Dummy member - no state needed */ struct kbasep_pm_policy_coarse_demand { - /** No state needed - just have a dummy variable here */ int dummy; }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index aad6c49b0c30..19e23d8aa69b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -15,8 +15,7 @@ -/** - * @file mali_kbase_pm_hwaccess_defs.h +/* * Backend-specific Power Manager definitions */ @@ -40,38 +39,33 @@ struct kbase_device; struct kbase_jd_atom; -/** The types of core in a GPU. +/** + * enum kbase_pm_core_type - The types of core in a GPU. * - * These enumerated values are used in calls to: - * - @ref kbase_pm_get_present_cores - * - @ref kbase_pm_get_active_cores - * - @ref kbase_pm_get_trans_cores - * - @ref kbase_pm_get_ready_cores. + * These enumerated values are used in calls to + * - kbase_pm_get_present_cores() + * - kbase_pm_get_active_cores() + * - kbase_pm_get_trans_cores() + * - kbase_pm_get_ready_cores(). * * They specify which type of core should be acted on. These values are set in - * a manner that allows @ref core_type_to_reg function to be simpler and more + * a manner that allows core_type_to_reg() function to be simpler and more * efficient. + * + * @KBASE_PM_CORE_L2: The L2 cache + * @KBASE_PM_CORE_SHADER: Shader cores + * @KBASE_PM_CORE_TILER: Tiler cores */ enum kbase_pm_core_type { - KBASE_PM_CORE_L2 = L2_PRESENT_LO, /**< The L2 cache */ - KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, /**< Shader cores */ - KBASE_PM_CORE_TILER = TILER_PRESENT_LO /**< Tiler cores */ + KBASE_PM_CORE_L2 = L2_PRESENT_LO, + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, + KBASE_PM_CORE_TILER = TILER_PRESENT_LO }; /** - * struct kbasep_pm_metrics_data - Metrics data collected for use by the power management framework. + * struct kbasep_pm_metrics_data - Metrics data collected for use by the power + * management framework. * - * @vsync_hit: indicates if a framebuffer update occured since the last vsync. - * A framebuffer driver is expected to provide this information by - * checking at each vsync if the framebuffer was updated and calling - * kbase_pm_vsync_callback() if there was a change of status. - * @utilisation: percentage indicating GPU load (0-100). - * The utilisation is the fraction of time the GPU was powered up - * and busy. This is based on the time_busy and time_idle metrics. - * @util_gl_share: percentage of GPU load related to OpenGL jobs (0-100). - * This is based on the busy_gl and time_busy metrics. - * @util_cl_share: percentage of GPU load related to OpenCL jobs (0-100). - * This is based on the busy_cl and time_busy metrics. * @time_period_start: time at which busy/idle measurements started * @time_busy: number of ns the GPU was busy executing jobs since the * @time_period_start timestamp. @@ -90,14 +84,9 @@ enum kbase_pm_core_type { * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that * if two GL jobs were active for 400ns, this value would be updated * with 800. - * @active_cl_ctx: number of CL jobs active on the GPU. This is a portion of - * the @nr_in_slots value. - * @active_gl_ctx: number of GL jobs active on the GPU. This is a portion of - * the @nr_in_slots value. - * @nr_in_slots: Total number of jobs currently submitted to the GPU across - * all job slots. Maximum value would be 2*BASE_JM_MAX_NR_SLOTS - * (one in flight and one in the JSn_HEAD_NEXT register for each - * job slot). + * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. + * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As + * GL jobs never run on slot 2 this slot is not recorded. * @lock: spinlock protecting the kbasep_pm_metrics_data structure * @timer: timer to regularly make DVFS decisions based on the power * management metrics. @@ -107,10 +96,6 @@ enum kbase_pm_core_type { * */ struct kbasep_pm_metrics_data { - int vsync_hit; - int utilisation; - int util_gl_share; - int util_cl_share[2]; /* 2 is a max number of core groups we can have */ ktime_t time_period_start; u32 time_busy; u32 time_idle; @@ -120,8 +105,7 @@ struct kbasep_pm_metrics_data { u32 busy_cl[2]; u32 busy_gl; u32 active_cl_ctx[2]; - u32 active_gl_ctx; - u8 nr_in_slots; + u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */ spinlock_t lock; #ifdef CONFIG_MALI_MIDGARD_DVFS @@ -133,20 +117,6 @@ struct kbasep_pm_metrics_data { struct kbase_device *kbdev; }; -/** Actions for DVFS. - * - * kbase_pm_get_dvfs_action will return one of these enumerated values to - * describe the action that the DVFS system should take. - */ -enum kbase_pm_dvfs_action { - KBASE_PM_DVFS_NOP, /* < No change in clock frequency is - * requested */ - KBASE_PM_DVFS_CLOCK_UP, /* < The clock frequency should be increased - * if possible */ - KBASE_PM_DVFS_CLOCK_DOWN /* < The clock frequency should be decreased - * if possible */ -}; - union kbase_pm_policy_data { struct kbasep_pm_policy_always_on always_on; struct kbasep_pm_policy_coarse_demand coarse_demand; @@ -165,218 +135,155 @@ union kbase_pm_ca_policy_data { }; /** - * Data stored per device for power management. + * struct kbase_pm_backend_data - Data stored per device for power management. * * This structure contains data for the power management framework. There is one * instance of this structure per device in the system. + * + * @ca_current_policy: The policy that is currently actively controlling core + * availability. + * @pm_current_policy: The policy that is currently actively controlling the + * power state. + * @ca_policy_data: Private data for current CA policy + * @pm_policy_data: Private data for current PM policy + * @ca_in_transition: Flag indicating when core availability policy is + * transitioning cores. The core availability policy must + * set this when a change in core availability is occurring. + * power_change_lock must be held when accessing this. + * @reset_done: Flag when a reset is complete + * @reset_done_wait: Wait queue to wait for changes to @reset_done + * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as + * requested + * @l2_powered: State indicating whether all the l2 caches are powered. + * Non-zero indicates they're *all* powered + * Zero indicates that some (or all) are not powered + * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter + * users + * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests + * @desired_shader_state: A bit mask identifying the shader cores that the + * power policy would like to be on. The current state + * of the cores may be different, but there should be + * transitions in progress that will eventually achieve + * this state (assuming that the policy doesn't change + * its mind in the mean time). + * @powering_on_shader_state: A bit mask indicating which shader cores are + * currently in a power-on transition + * @desired_tiler_state: A bit mask identifying the tiler cores that the power + * policy would like to be on. See @desired_shader_state + * @powering_on_tiler_state: A bit mask indicating which tiler core are + * currently in a power-on transition + * @powering_on_l2_state: A bit mask indicating which l2-caches are currently + * in a power-on transition + * @gpu_in_desired_state: This flag is set if the GPU is powered as requested + * by the desired_xxx_state variables + * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 + * @gpu_powered: Set to true when the GPU is powered and register + * accesses are possible, false otherwise + * @instr_enabled: Set to true when instrumentation is enabled, + * false otherwise + * @cg1_disabled: Set if the policy wants to keep the second core group + * powered off + * @driver_ready_for_irqs: Debug state indicating whether sufficient + * initialization of the driver has occurred to handle + * IRQs + * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or + * accessing @driver_ready_for_irqs + * @metrics: Structure to hold metrics for the GPU + * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is + * powered off + * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders + * are powered off + * @gpu_poweroff_timer: Timer for powering off GPU + * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires + * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq + * @shader_poweroff_pending: Bit mask of shaders to be powered off on next + * timer callback + * @poweroff_timer_needed: true if the poweroff timer is currently running, + * false otherwise + * @callback_power_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to + * be turned off. See &struct kbase_pm_callback_conf + * @callback_power_resume: Callback when a resume occurs and the GPU needs to + * be turned on. See &struct kbase_pm_callback_conf + * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. + * If enable is set then snoops should be enabled + * otherwise snoops should be disabled + * + * Note: + * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the + * policy is being changed with kbase_pm_ca_set_policy() or + * kbase_pm_set_policy(). The change is protected under + * kbase_device.pm.power_change_lock. Direct access to this + * from IRQ context must therefore check for NULL. If NULL, then + * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy + * functions that would have been done under IRQ. */ struct kbase_pm_backend_data { - /** - * The policy that is currently actively controlling core availability. - * - * @note: During an IRQ, this can be NULL when the policy is being - * changed with kbase_pm_ca_set_policy(). The change is protected under - * kbase_device::pm::power_change_lock. Direct access to this from IRQ - * context must therefore check for NULL. If NULL, then - * kbase_pm_ca_set_policy() will re-issue the policy functions that - * would've been done under IRQ. - */ const struct kbase_pm_ca_policy *ca_current_policy; - - /** - * The policy that is currently actively controlling the power state. - * - * @note: During an IRQ, this can be NULL when the policy is being - * changed with kbase_pm_set_policy(). The change is protected under - * kbase_device::pm::power_change_lock. Direct access to this from IRQ - * context must therefore check for NULL. If NULL, then - * kbase_pm_set_policy() will re-issue the policy functions that - * would've been done under IRQ. - */ const struct kbase_pm_policy *pm_current_policy; - - /** Private data for current CA policy */ union kbase_pm_ca_policy_data ca_policy_data; - - /** Private data for current PM policy */ union kbase_pm_policy_data pm_policy_data; - - /** - * Flag indicating when core availability policy is transitioning cores. - * The core availability policy must set this when a change in core - * availability is occuring. - * - * power_change_lock must be held when accessing this. */ bool ca_in_transition; - - /** Waiting for reset and a queue to wait for changes */ bool reset_done; wait_queue_head_t reset_done_wait; - - /** Wait queue for whether the l2 cache has been powered as requested */ wait_queue_head_t l2_powered_wait; - /** State indicating whether all the l2 caches are powered. - * Non-zero indicates they're *all* powered - * Zero indicates that some (or all) are not powered */ int l2_powered; - - /** The reference count of active gpu cycle counter users */ int gpu_cycle_counter_requests; - /** Lock to protect gpu_cycle_counter_requests */ spinlock_t gpu_cycle_counter_requests_lock; - /** - * A bit mask identifying the shader cores that the power policy would - * like to be on. The current state of the cores may be different, but - * there should be transitions in progress that will eventually achieve - * this state (assuming that the policy doesn't change its mind in the - * mean time). - */ u64 desired_shader_state; - /** - * A bit mask indicating which shader cores are currently in a power-on - * transition - */ u64 powering_on_shader_state; - /** - * A bit mask identifying the tiler cores that the power policy would - * like to be on. @see kbase_pm_device_data:desired_shader_state - */ u64 desired_tiler_state; - /** - * A bit mask indicating which tiler core are currently in a power-on - * transition - */ u64 powering_on_tiler_state; - - /** - * A bit mask indicating which l2-caches are currently in a power-on - * transition - */ u64 powering_on_l2_state; - /** - * This flag is set if the GPU is powered as requested by the - * desired_xxx_state variables - */ bool gpu_in_desired_state; - /* Wait queue set when gpu_in_desired_state != 0 */ wait_queue_head_t gpu_in_desired_state_wait; - /** - * Set to true when the GPU is powered and register accesses are - * possible, false otherwise - */ bool gpu_powered; - /** Set to true when instrumentation is enabled, false otherwise */ bool instr_enabled; bool cg1_disabled; #ifdef CONFIG_MALI_DEBUG - /** - * Debug state indicating whether sufficient initialization of the - * driver has occurred to handle IRQs - */ bool driver_ready_for_irqs; #endif /* CONFIG_MALI_DEBUG */ - /** - * Spinlock that must be held when: - * - writing gpu_powered - * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds) - */ spinlock_t gpu_powered_lock; - /** Structure to hold metrics for the GPU */ struct kbasep_pm_metrics_data metrics; - /** - * Set to the number of poweroff timer ticks until the GPU is powered - * off - */ int gpu_poweroff_pending; - - /** - * Set to the number of poweroff timer ticks until shaders are powered - * off - */ int shader_poweroff_pending_time; - /** Timer for powering off GPU */ struct hrtimer gpu_poweroff_timer; - struct workqueue_struct *gpu_poweroff_wq; - struct work_struct gpu_poweroff_work; - /** Bit mask of shaders to be powered off on next timer callback */ u64 shader_poweroff_pending; - /** - * Set to true if the poweroff timer is currently running, - * false otherwise - */ bool poweroff_timer_needed; - /** - * Callback when the GPU needs to be turned on. See - * @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - * - * @return 1 if GPU state was lost, 0 otherwise - */ int (*callback_power_on)(struct kbase_device *kbdev); - - /** - * Callback when the GPU may be turned off. See - * @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - */ void (*callback_power_off)(struct kbase_device *kbdev); - - /** - * Callback when a suspend occurs and the GPU needs to be turned off. - * See @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - */ void (*callback_power_suspend)(struct kbase_device *kbdev); - - /** - * Callback when a resume occurs and the GPU needs to be turned on. - * See @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - */ void (*callback_power_resume)(struct kbase_device *kbdev); - - /** - * Callback when the GPU needs to be turned on. See - * @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - * - * @return 1 if GPU state was lost, 0 otherwise - */ int (*callback_power_runtime_on)(struct kbase_device *kbdev); - - /** - * Callback when the GPU may be turned off. See - * @ref kbase_pm_callback_conf - * - * @param kbdev The kbase device - */ void (*callback_power_runtime_off)(struct kbase_device *kbdev); }; -/** List of policy IDs */ +/* List of policy IDs */ enum kbase_pm_policy_id { KBASE_PM_POLICY_ID_DEMAND = 1, KBASE_PM_POLICY_ID_ALWAYS_ON, @@ -390,13 +297,24 @@ enum kbase_pm_policy_id { typedef u32 kbase_pm_policy_flags; /** - * Power policy structure. + * struct kbase_pm_policy - Power policy structure. * * Each power policy exposes a (static) instance of this structure which * contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @get_core_mask: Function called to get the current shader core mask + * @get_core_active: Function called to get the current overall GPU power + * state + * @flags: Field indicating flags for this policy + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. */ struct kbase_pm_policy { - /** The name of this policy */ char *name; /** @@ -408,16 +326,16 @@ struct kbase_pm_policy { * It is undefined what state the cores are in when the function is * called. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) */ void (*init)(struct kbase_device *kbdev); /** * Function called when the policy is unselected. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) */ void (*term)(struct kbase_device *kbdev); @@ -427,10 +345,10 @@ struct kbase_pm_policy { * The returned mask should meet or exceed (kbdev->shader_needed_bitmap * | kbdev->shader_inuse_bitmap). * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) * - * @return The mask of shader cores to be powered + * Return: The mask of shader cores to be powered */ u64 (*get_core_mask)(struct kbase_device *kbdev); @@ -443,21 +361,14 @@ struct kbase_pm_policy { * to 0 then there are no active contexts and the GPU could be powered * off if desired. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) * - * @return true if the GPU should be powered, false otherwise + * Return: true if the GPU should be powered, false otherwise */ bool (*get_core_active)(struct kbase_device *kbdev); - /** Field indicating flags for this policy */ kbase_pm_policy_flags flags; - - /** - * Field indicating an ID for this policy. This is not necessarily the - * same as its index in the list returned by kbase_pm_list_policies(). - * It is used purely for debugging. - */ enum kbase_pm_policy_id id; }; @@ -470,13 +381,24 @@ enum kbase_pm_ca_policy_id { typedef u32 kbase_pm_ca_policy_flags; /** - * Core availability policy structure. + * struct kbase_pm_ca_policy - Core availability policy structure. * * Each core availability policy exposes a (static) instance of this structure * which contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @get_core_mask: Function called to get the current shader core + * availability mask + * @update_core_status: Function called to update the current core status + * @flags: Field indicating flags for this policy + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. */ struct kbase_pm_ca_policy { - /** The name of this policy */ char *name; /** @@ -488,33 +410,33 @@ struct kbase_pm_ca_policy { * It is undefined what state the cores are in when the function is * called. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev The kbase device structure for the device (must be a + * valid pointer) */ void (*init)(struct kbase_device *kbdev); /** * Function called when the policy is unselected. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) + * @kbdev The kbase device structure for the device (must be a + * valid pointer) */ void (*term)(struct kbase_device *kbdev); /** * Function called to get the current shader core availability mask * - * When a change in core availability is occuring, the policy must set + * When a change in core availability is occurring, the policy must set * kbdev->pm.ca_in_transition to true. This is to indicate that * reporting changes in power state cannot be optimized out, even if * kbdev->pm.desired_shader_state remains unchanged. This must be done * by any functions internal to the Core Availability Policy that change * the return value of kbase_pm_ca_policy::get_core_mask. * - * @param kbdev The kbase device structure for the device (must be a + * @kbdev The kbase device structure for the device (must be a * valid pointer) * - * @return The current core availability mask + * Return: The current core availability mask */ u64 (*get_core_mask)(struct kbase_device *kbdev); @@ -531,17 +453,16 @@ struct kbase_pm_ca_policy { * changes in power state can once again be optimized out when * kbdev->pm.desired_shader_state is unchanged. * - * @param kbdev The kbase device structure for the device - * (must be a valid pointer) - * @param cores_ready The mask of cores currently powered and - * ready to run jobs - * @param cores_transitioning The mask of cores currently transitioning - * power state + * @kbdev: The kbase device structure for the device + * (must be a valid pointer) + * @cores_ready: The mask of cores currently powered and + * ready to run jobs + * @cores_transitioning: The mask of cores currently transitioning + * power state */ void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning); - /** Field indicating flags for this policy */ kbase_pm_ca_policy_flags flags; /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c index 4e5b79c9c448..9dac2303bd00 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_demand.c +/* * A simple demand based power management policy */ @@ -37,7 +36,8 @@ static u64 demand_get_core_mask(struct kbase_device *kbdev) static bool demand_get_core_active(struct kbase_device *kbdev) { - if (0 == kbdev->pm.active_count) + if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | + kbdev->shader_inuse_bitmap)) return false; return true; @@ -53,8 +53,8 @@ static void demand_term(struct kbase_device *kbdev) CSTD_UNUSED(kbdev); } -/** - * The @ref struct kbase_pm_policy structure for the demand power policy. +/* + * The struct kbase_pm_policy structure for the demand power policy. * * This is the static structure that defines the demand power policy's callback * and name. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h index 8b7a17ab9843..c0c84b6e9189 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_demand.h +/* * A simple demand based power management policy */ @@ -26,32 +25,37 @@ #define MALI_KBASE_PM_DEMAND_H /** + * DOC: Demand power management policy + * * The demand power management policy has the following characteristics: * - When KBase indicates that the GPU will be powered up, but we don't yet * know which Job Chains are to be run: * - The Shader Cores are not powered up + * * - When KBase indicates that a set of Shader Cores are needed to submit the * currently queued Job Chains: * - Only those Shader Cores are powered up + * * - When KBase indicates that the GPU need not be powered: * - The Shader Cores are powered off, and the GPU itself is powered off too. * - * @note: + * Note: * - KBase indicates the GPU will be powered up when it has a User Process that * has just started to submit Job Chains. + * * - KBase indicates the GPU need not be powered when all the Job Chains from * User Processes have finished, and it is waiting for a User Process to * submit some more Job Chains. */ /** - * Private structure for policy instance data. + * struct kbasep_pm_policy_demand - Private structure for policy instance data + * + * @dummy: No state is needed, a dummy variable * - * This contains data that is private to the particular power policy that is - * active. + * This contains data that is private to the demand power policy. */ struct kbasep_pm_policy_demand { - /** No state needed - just have a dummy variable here */ int dummy; }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 1d72fd60143b..d325c0a7e646 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_driver.c +/* * Base kernel Power Management hardware control */ @@ -35,23 +34,36 @@ #include #include #include +#include #include #include #include #include +#include + #if MALI_MOCK_TEST #define MOCKABLE(function) function##_original #else #define MOCKABLE(function) function #endif /* MALI_MOCK_TEST */ +/* Special value to indicate that the JM_CONFIG reg isn't currently used. */ +#define KBASE_JM_CONFIG_UNUSED (1<<31) + /** - * Actions that can be performed on a core. + * enum kbasep_pm_action - Actions that can be performed on a core. * * This enumeration is private to the file. Its values are set to allow - * @ref core_type_to_reg function, which decodes this enumeration, to be simpler + * core_type_to_reg() function, which decodes this enumeration, to be simpler * and more efficient. + * + * @ACTION_PRESENT: The cores that are present + * @ACTION_READY: The cores that are ready + * @ACTION_PWRON: Power on the cores specified + * @ACTION_PWROFF: Power off the cores specified + * @ACTION_PWRTRANS: The cores that are transitioning + * @ACTION_PWRACTIVE: The cores that are active */ enum kbasep_pm_action { ACTION_PRESENT = 0, @@ -68,18 +80,18 @@ static u64 kbase_pm_get_state( enum kbasep_pm_action action); /** - * Decode a core type and action to a register. + * core_type_to_reg - Decode a core type and action to a register. * - * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined - * by @ref kbasep_pm_action) this function will return the register offset that - * will perform the action on the core type. The register returned is the \c _LO - * register and an offset must be applied to use the \c _HI register. + * Given a core type (defined by kbase_pm_core_type) and an action (defined + * by kbasep_pm_action) this function will return the register offset that + * will perform the action on the core type. The register returned is the _LO + * register and an offset must be applied to use the _HI register. * - * @param core_type The type of core - * @param action The type of action + * @core_type: The type of core + * @action: The type of action * - * @return The register offset of the \c _LO register that performs an action of - * type \c action on a core of type \c core_type. + * Return: The register offset of the _LO register that performs an action of + * type @action on a core of type @core_type. */ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, enum kbasep_pm_action action) @@ -88,16 +100,17 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, } -/** Invokes an action on a core set +/** + * kbase_pm_invoke - Invokes an action on a core set * - * This function performs the action given by \c action on a set of cores of a - * type given by \c core_type. It is a static function used by - * @ref kbase_pm_transition_core_type + * This function performs the action given by @action on a set of cores of a + * type given by @core_type. It is a static function used by + * kbase_pm_transition_core_type() * - * @param kbdev The kbase device structure of the device - * @param core_type The type of core that the action should be performed on - * @param cores A bit mask of cores to perform the action on (low 32 bits) - * @param action The action to perform on the cores + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the action should be performed on + * @cores: A bit mask of cores to perform the action on (low 32 bits) + * @action: The action to perform on the cores */ static void kbase_pm_invoke(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, @@ -178,18 +191,18 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, } /** - * Get information about a core set + * kbase_pm_get_state - Get information about a core set * - * This function gets information (chosen by \c action) about a set of cores of - * a type given by \c core_type. It is a static function used by @ref - * kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref - * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores. + * This function gets information (chosen by @action) about a set of cores of + * a type given by @core_type. It is a static function used by + * kbase_pm_get_present_cores(), kbase_pm_get_active_cores(), + * kbase_pm_get_trans_cores() and kbase_pm_get_ready_cores(). * - * @param kbdev The kbase device structure of the device - * @param core_type The type of core that the should be queried - * @param action The property of the cores to query + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the should be queried + * @action: The property of the cores to query * - * @return A bit mask specifying the state of the cores + * Return: A bit mask specifying the state of the cores */ static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, @@ -210,12 +223,6 @@ static u64 kbase_pm_get_state(struct kbase_device *kbdev, void kbasep_pm_read_present_cores(struct kbase_device *kbdev) { - kbdev->shader_present_bitmap = - kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT); - kbdev->tiler_present_bitmap = - kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT); - kbdev->l2_present_bitmap = - kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT); kbdev->shader_inuse_bitmap = 0; kbdev->shader_needed_bitmap = 0; kbdev->shader_available_bitmap = 0; @@ -231,7 +238,12 @@ void kbasep_pm_read_present_cores(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores); /** - * Get the cores that are present + * kbase_pm_get_present_cores - Get the cores that are present + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of the cores that are present */ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type) @@ -240,11 +252,11 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, switch (type) { case KBASE_PM_CORE_L2: - return kbdev->l2_present_bitmap; + return kbdev->gpu_props.props.raw_props.l2_present; case KBASE_PM_CORE_SHADER: - return kbdev->shader_present_bitmap; + return kbdev->gpu_props.props.raw_props.shader_present; case KBASE_PM_CORE_TILER: - return kbdev->tiler_present_bitmap; + return kbdev->gpu_props.props.raw_props.tiler_present; } KBASE_DEBUG_ASSERT(0); return 0; @@ -253,7 +265,13 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); /** - * Get the cores that are "active" (busy processing work) + * kbase_pm_get_active_cores - Get the cores that are "active" + * (busy processing work) + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are active */ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type) @@ -264,7 +282,13 @@ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); /** - * Get the cores that are transitioning between power states + * kbase_pm_get_trans_cores - Get the cores that are transitioning between + * power states + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are transitioning */ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type) @@ -275,7 +299,12 @@ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); /** - * Get the cores that are powered on + * kbase_pm_get_ready_cores - Get the cores that are powered on + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are ready (powered on) */ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type) @@ -307,7 +336,8 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); /** - * Perform power transitions for a particular core type. + * kbase_pm_transition_core_type - Perform power transitions for a particular + * core type. * * This function will perform any available power transitions to make the actual * hardware state closer to the desired state. If a core is currently @@ -316,19 +346,19 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); * hardware are ignored if they are specified in the desired_state bitmask, * however the return value will always be 0 in this case. * - * @param kbdev The kbase device - * @param type The core type to perform transitions for - * @param desired_state A bit mask of the desired state of the cores - * @param in_use A bit mask of the cores that are currently running - * jobs. These cores have to be kept powered up because - * there are jobs running (or about to run) on them. - * @param[out] available Receives a bit mask of the cores that the job - * scheduler can use to submit jobs to. May be NULL if - * this is not needed. - * @param[in,out] powering_on Bit mask to update with cores that are - * transitioning to a power-on state. + * @kbdev: The kbase device + * @type: The core type to perform transitions for + * @desired_state: A bit mask of the desired state of the cores + * @in_use: A bit mask of the cores that are currently running + * jobs. These cores have to be kept powered up because + * there are jobs running (or about to run) on them. + * @available: Receives a bit mask of the cores that the job + * scheduler can use to submit jobs to. May be NULL if + * this is not needed. + * @powering_on: Bit mask to update with cores that are + * transitioning to a power-on state. * - * @return true if the desired state has been reached, false otherwise + * Return: true if the desired state has been reached, false otherwise */ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, enum kbase_pm_core_type type, @@ -430,18 +460,19 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); /** - * Determine which caches should be on for a particular core state. + * get_desired_cache_status - Determine which caches should be on for a + * particular core state * * This function takes a bit mask of the present caches and the cores (or * caches) that are attached to the caches that will be powered. It then * computes which caches should be turned on to allow the cores requested to be * powered up. * - * @param present The bit mask of present caches - * @param cores_powered A bit mask of cores (or L2 caches) that are desired to - * be powered + * @present: The bit mask of present caches + * @cores_powered: A bit mask of cores (or L2 caches) that are desired to + * be powered * - * @return A bit mask of the caches that should be turned on + * Return: A bit mask of the caches that should be turned on */ static u64 get_desired_cache_status(u64 present, u64 cores_powered) { @@ -514,17 +545,18 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) /* If there are l2 cache users registered, keep all l2s powered even if * all other cores are off. */ if (kbdev->l2_users_count > 0) - cores_powered |= kbdev->l2_present_bitmap; + cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; - desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap, - cores_powered); + desired_l2_state = get_desired_cache_status( + kbdev->gpu_props.props.raw_props.l2_present, + cores_powered); /* If any l2 cache is on, then enable l2 #0, for use by job manager */ if (0 != desired_l2_state) { desired_l2_state |= 1; /* Also enable tiler if l2 cache is powered */ kbdev->pm.backend.desired_tiler_state = - kbdev->tiler_present_bitmap; + kbdev->gpu_props.props.raw_props.tiler_present; } else { kbdev->pm.backend.desired_tiler_state = 0; } @@ -573,8 +605,9 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbdev->tiler_available_bitmap = tiler_available_bitmap; - } else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) != - kbdev->tiler_present_bitmap) { + } else if ((l2_available_bitmap & + kbdev->gpu_props.props.raw_props.tiler_present) != + kbdev->gpu_props.props.raw_props.tiler_present) { tiler_available_bitmap = 0; if (kbdev->tiler_available_bitmap != tiler_available_bitmap) @@ -701,10 +734,18 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); +/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has + * aborted due to a fatal signal. If the time spent waiting has exceeded this + * threshold then there is most likely a hardware issue. */ +#define PM_TIMEOUT (5*HZ) /* 5s */ + void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) { unsigned long flags; + unsigned long timeout; bool cores_are_available; + int ret; + /* Force the transition to be checked and reported - the cores may be * 'available' (for job submission) but not fully powered up. */ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); @@ -713,13 +754,63 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) CSTD_UNUSED(cores_are_available); spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* Wait for cores */ - wait_event(kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.gpu_in_desired_state); + timeout = jiffies + PM_TIMEOUT; - /* Log timelining information that a change in state has completed */ - kbase_timeline_pm_handle_event(kbdev, + /* Wait for cores */ + ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.gpu_in_desired_state); + + if (ret < 0 && time_after(jiffies, timeout)) { + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.desired_shader_state); + dev_err(kbdev->dev, "\tTiler =%016llx\n", + kbdev->pm.backend.desired_tiler_state); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO), + NULL)); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO), NULL)); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI), NULL), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO), NULL)); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO), NULL)); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO), NULL)); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO), NULL)); +#if KBASE_GPU_RESET_EN + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +#endif /* KBASE_GPU_RESET_EN */ + } else { + /* Log timelining information that a change in state has + * completed */ + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + } } KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); @@ -908,6 +999,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) return false; } + /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); @@ -935,7 +1027,11 @@ void kbase_pm_reset_done(struct kbase_device *kbdev) } /** - * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state. + * kbase_pm_wait_for_reset - Wait for a reset to happen + * + * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. + * + * @kbdev: Kbase device */ static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) { @@ -964,6 +1060,14 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) { + struct device_node *np = kbdev->dev->of_node; + u32 jm_values[4]; + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >> + GPU_ID_VERSION_MAJOR_SHIFT; + kbdev->hw_quirks_sc = 0; /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. @@ -1005,6 +1109,50 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + + + /* Only for T86x/T88x-based products after r2p0 */ + if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { + /* The JM_CONFIG register is specified as follows in the + T86x/T88x Engineering Specification Supplement: + The values are read from device tree in order. + */ +#define TIMESTAMP_OVERRIDE 1 +#define CLOCK_GATE_OVERRIDE (1<<1) +#define JOB_THROTTLE_ENABLE (1<<2) +#define JOB_THROTTLE_LIMIT_SHIFT 3 + + /* 6 bits in the register */ + const u32 jm_max_limit = 0x3F; + + if (of_property_read_u32_array(np, + "jm_config", + &jm_values[0], + ARRAY_SIZE(jm_values))) { + /* Entry not in device tree, use defaults */ + jm_values[0] = 0; + jm_values[1] = 0; + jm_values[2] = 0; + jm_values[3] = jm_max_limit; /* Max value */ + } + + /* Limit throttle limit to 6 bits*/ + if (jm_values[3] > jm_max_limit) { + dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63)."); + jm_values[3] = jm_max_limit; + } + + /* Aggregate to one integer. */ + kbdev->hw_quirks_jm = (jm_values[0] ? TIMESTAMP_OVERRIDE : 0); + kbdev->hw_quirks_jm |= (jm_values[1] ? CLOCK_GATE_OVERRIDE : 0); + kbdev->hw_quirks_jm |= (jm_values[2] ? JOB_THROTTLE_ENABLE : 0); + kbdev->hw_quirks_jm |= (jm_values[3] << + JOB_THROTTLE_LIMIT_SHIFT); + } else { + kbdev->hw_quirks_jm = KBASE_JM_CONFIG_UNUSED; + } + + } static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) @@ -1018,6 +1166,12 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), kbdev->hw_quirks_mmu, NULL); + + + if (kbdev->hw_quirks_jm != KBASE_JM_CONFIG_UNUSED) + kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), + kbdev->hw_quirks_jm, NULL); + } @@ -1062,6 +1216,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* Soft reset the GPU */ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_jd_gpu_soft_reset(kbdev); +#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL); @@ -1172,19 +1329,20 @@ out: } /** + * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * * Increase the count of cycle counter users and turn the cycle counters on if * they were previously off * * This function is designed to be called by - * @ref kbase_pm_request_gpu_cycle_counter or - * @ref kbase_pm_request_gpu_cycle_counter_l2_is_on only + * kbase_pm_request_gpu_cycle_counter() or + * kbase_pm_request_gpu_cycle_counter_l2_is_on() only * * When this function is called the l2 cache must be on and the l2 cache users - * count must have been incremented by a call to (@ref - * kbase_pm_request_l2_caches or @ref kbase_pm_request_l2_caches_l2_on) - * - * @param kbdev The kbase device structure of the device + * count must have been incremented by a call to ( + * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) * + * @kbdev: The kbase device structure of the device */ static void kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index a4bd11a1c0f6..bcca37d05402 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -17,8 +17,7 @@ -/** - * @file mali_kbase_pm_hwaccess_internal.h +/* * Power management API definitions used internally by GPU backend */ @@ -32,157 +31,150 @@ /** - * The GPU is idle. + * kbase_pm_dev_idle - The GPU is idle. * * The OS may choose to turn off idle devices * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_dev_idle(struct kbase_device *kbdev); /** - * The GPU is active. + * kbase_pm_dev_activate - The GPU is active. * * The OS should avoid opportunistically turning off the GPU while it is active * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_dev_activate(struct kbase_device *kbdev); /** - * Get details of the cores that are present in the device. + * kbase_pm_get_present_cores - Get details of the cores that are present in + * the device. * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) present in the GPU device and also a count of * the number of cores. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param type The type of core (see the @ref enum kbase_pm_core_type - * enumeration) + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) * - * @return The bit mask of cores present + * Return: The bit mask of cores present */ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); /** - * Get details of the cores that are currently active in the device. + * kbase_pm_get_active_cores - Get details of the cores that are currently + * active in the device. * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are actively processing work (i.e. * turned on *and* busy). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param type The type of core (see the @ref enum kbase_pm_core_type - * enumeration) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) * - * @return The bit mask of active cores + * Return: The bit mask of active cores */ u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); /** - * Get details of the cores that are currently transitioning between power - * states. + * kbase_pm_get_trans_cores - Get details of the cores that are currently + * transitioning between power states. * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are currently transitioning between * power states. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param type The type of core (see the @ref enum kbase_pm_core_type - * enumeration) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) * - * @return The bit mask of transitioning cores + * Return: The bit mask of transitioning cores */ u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); /** - * Get details of the cores that are currently powered and ready for jobs. + * kbase_pm_get_ready_cores - Get details of the cores that are currently + * powered and ready for jobs. * * This function can be called by the active power policy to return a bitmask of * the cores (of a specified type) that are powered and ready for jobs (they may * or may not be currently executing jobs). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param type The type of core (see the @ref enum kbase_pm_core_type - * enumeration) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) * - * @return The bit mask of ready cores + * Return: The bit mask of ready cores */ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); /** - * Turn the clock for the device on, and enable device interrupts. + * kbase_pm_clock_on - Turn the clock for the device on, and enable device + * interrupts. * * This function can be used by a power policy to turn the clock for the GPU on. * It should be modified during integration to perform the necessary actions to * ensure that the GPU is fully powered and clocked. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param is_resume true if clock on due to resume after suspend, - * false otherwise + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if clock on due to resume after suspend, false otherwise */ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); /** - * Disable device interrupts, and turn the clock for the device off. + * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the + * device off. * * This function can be used by a power policy to turn the clock for the GPU * off. It should be modified during integration to perform the necessary * actions to turn the clock off (if this is possible in the integration). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param is_suspend true if clock off due to suspend, false otherwise + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_suspend: true if clock off due to suspend, false otherwise * - * @return true if clock was turned off + * Return: true if clock was turned off, or * false if clock can not be turned off due to pending page/bus fault * workers. Caller must flush MMU workqueues and retry */ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); /** - * Enable interrupts on the device. + * kbase_pm_enable_interrupts - Enable interrupts on the device. * * Interrupts are also enabled after a call to kbase_pm_clock_on(). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); /** - * Enable interrupts on the device, using the provided mask to set MMU_IRQ_MASK. + * kbase_pm_enable_interrupts_mmu_mask - Enable interrupts on the device, using + * the provided mask to set MMU_IRQ_MASK. * * Interrupts are also enabled after a call to kbase_pm_clock_on(). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param mask The mask to use for MMU_IRQ_MASK + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @mask: The mask to use for MMU_IRQ_MASK */ void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask); /** - * Disable interrupts on the device. + * kbase_pm_disable_interrupts - Disable interrupts on the device. * * This prevents delivery of Power Management interrupts to the CPU so that * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler - * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called. + * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. * * Interrupts are also disabled after a call to kbase_pm_clock_off(). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); @@ -200,20 +192,20 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); /** - * The GPU has been reset successfully. + * kbase_pm_reset_done - The GPU has been reset successfully. * * This function must be called by the GPU interrupt handler when the * RESET_COMPLETED bit is set. It signals to the power management initialization * code that the GPU has been successfully reset. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_reset_done(struct kbase_device *kbdev); /** - * Check if there are any power transitions to make, and if so start them. + * kbase_pm_check_transitions_nolock - Check if there are any power transitions + * to make, and if so start them. * * This function will check the desired_xx_state members of * struct kbase_pm_device_data and the actual status of the hardware to see if @@ -225,28 +217,29 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * Management IRQ). * * Note that this still returns true when desired_xx_state has no - * cores. That is: of the no cores desired, none were unavailable. In + * cores. That is: of the no cores desired, none were *un*available. In * this case, the caller may still need to try submitting jobs. This is because * the Core Availability Policy might have taken us to an intermediate state * where no cores are powered, before powering on more cores (e.g. for core * rotation) * - * The caller must hold kbase_device::pm::power_change_lock + * The caller must hold kbase_device.pm.power_change_lock * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @return non-zero when all desired cores are available. That is, + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: non-zero when all desired cores are available. That is, * it's worthwhile for the caller to submit a job. - * @return false otherwise + * false otherwise */ bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); /** - * Synchronous and locking variant of kbase_pm_check_transitions_nolock() + * kbase_pm_check_transitions_sync - Synchronous and locking variant of + * kbase_pm_check_transitions_nolock() * * On returning, the desired state at the time of the call will have been met. * - * @note There is nothing to stop the core being switched off by calls to + * There is nothing to stop the core being switched off by calls to * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the * caller must have already made a call to * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. @@ -255,212 +248,184 @@ bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); * a GPU Reset. * * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold - * kbase_device::pm::power_change_lock, because this function will take that + * kbase_device.pm.power_change_lock, because this function will take that * lock itself. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); /** - * Variant of kbase_pm_update_cores_state() where the caller must hold - * kbase_device::pm::power_change_lock + * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() + * where the caller must hold + * kbase_device.pm.power_change_lock * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); /** - * Update the desired state of shader cores from the Power Policy, and begin - * any power transitions. + * kbase_pm_update_cores_state - Update the desired state of shader cores from + * the Power Policy, and begin any power + * transitions. * * This function will update the desired_xx_state members of * struct kbase_pm_device_data by calling into the current Power Policy. It will * then begin power transitions to make the hardware acheive the desired shader * core state. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_update_cores_state(struct kbase_device *kbdev); /** - * Cancel any pending requests to power off the GPU and/or shader cores. + * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off + * the GPU and/or shader cores. * * This should be called by any functions which directly power off the GPU. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); /** - * Read the bitmasks of present cores. + * kbasep_pm_read_present_cores - Read the bitmasks of present cores. * * This information is cached to avoid having to perform register reads whenever * the information is required. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbasep_pm_read_present_cores(struct kbase_device *kbdev); /** - * Initialize the metrics gathering framework. + * kbasep_pm_metrics_init - Initialize the metrics gathering framework. * * This must be called before other metric gathering APIs are called. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * @return 0 on success, error code on error + * Return: 0 on success, error code on error */ int kbasep_pm_metrics_init(struct kbase_device *kbdev); /** - * Terminate the metrics gathering framework. + * kbasep_pm_metrics_term - Terminate the metrics gathering framework. * * This must be called when metric gathering is no longer required. It is an * error to call any metrics gathering function (other than - * kbasep_pm_metrics_init) after calling this function. + * kbasep_pm_metrics_init()) after calling this function. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbasep_pm_metrics_term(struct kbase_device *kbdev); /** - * Function to be called by the frame buffer driver to update the vsync metric. + * kbase_pm_report_vsync - Function to be called by the frame buffer driver to + * update the vsync metric. * * This function should be called by the frame buffer driver to update whether * the system is hitting the vsync target or not. buffer_updated should be true * if the vsync corresponded with a new frame being displayed, otherwise it * should be false. This function does not need to be called every vsync, but - * only when the value of buffer_updated differs from a previous call. + * only when the value of @buffer_updated differs from a previous call. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) - * @param buffer_updated True if the buffer has been updated on this VSync, - * false otherwise + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @buffer_updated: True if the buffer has been updated on this VSync, + * false otherwise */ void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); /** - * Configure the frame buffer device to set the vsync callback. - * - * This function should do whatever is necessary for this integration to ensure - * that kbase_pm_report_vsync is called appropriately. + * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change + * the clock speed of the GPU. * - * This function will need porting as part of the integration for a device. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_pm_register_vsync_callback(struct kbase_device *kbdev); - -/** - * Free any resources that kbase_pm_register_vsync_callback allocated. - * - * This function should perform any cleanup required from the call to - * kbase_pm_register_vsync_callback. No call backs should occur after this - * function has returned. - * - * This function will need porting as part of the integration for a device. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev); - -/** - * Determine whether the DVFS system should change the clock speed of the GPU. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This function should be called regularly by the DVFS system to check whether - * the clock speed of the GPU needs updating. It will return one of three - * enumerated values of kbase_pm_dvfs_action: - * - * @param kbdev The kbase device structure for the device - * (must be a valid pointer) - * @retval KBASE_PM_DVFS_NOP The clock does not need changing - * @retval KBASE_PM_DVFS_CLOCK_UP The clock frequency should be increased if - * possible. - * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if - * possible. + * the clock speed of the GPU needs updating. */ -enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev); +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); /** - * Mark that the GPU cycle counter is needed, if the caller is the first caller - * then the GPU cycle counters will be enabled along with the l2 cache + * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is + * needed + * + * If the caller is the first caller then the GPU cycle counters will be enabled + * along with the l2 cache * * The GPU must be powered when calling this function (i.e. - * @ref kbase_pm_context_active must have been called). + * kbase_pm_context_active() must have been called). * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); /** + * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is + * needed (l2 cache already on) + * * This is a version of the above function - * (@ref kbase_pm_request_gpu_cycle_counter) suitable for being called when the + * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the * l2 cache is known to be on and assured to be on until the subsequent call of - * kbase_pm_release_gpu_cycle_counter such as when a job is submitted. It does + * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does * not sleep and can be called from atomic functions. * * The GPU must be powered when calling this function (i.e. - * @ref kbase_pm_context_active must have been called) and the l2 cache must be + * kbase_pm_context_active() must have been called) and the l2 cache must be * powered on. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); /** - * Mark that the GPU cycle counter is no longer in use, if the caller is the + * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no + * longer in use + * + * If the caller is the * last caller then the GPU cycle counters will be disabled. A request must have * been made before a call to this. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); /** + * kbase_pm_register_access_enable - Enable access to GPU registers + * * Enables access to the GPU registers before power management has powered up * the GPU with kbase_pm_powerup(). * - * Access to registers should be done using kbase_os_reg_read/write() at this - * stage, not kbase_reg_read/write(). + * Access to registers should be done using kbase_os_reg_read()/write() at this + * stage, not kbase_reg_read()/write(). * * This results in the power management callbacks provided in the driver * configuration to get called to turn on power and/or clocks to the GPU. See - * @ref kbase_pm_callback_conf. + * kbase_pm_callback_conf. * * This should only be used before power management is powered up with * kbase_pm_powerup() * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_register_access_enable(struct kbase_device *kbdev); /** + * kbase_pm_register_access_disable - Disable early register access + * * Disables access to the GPU registers enabled earlier by a call to * kbase_pm_register_access_enable(). * * This results in the power management callbacks provided in the driver * configuration to get called to turn off power and/or clocks to the GPU. See - * @ref kbase_pm_callback_conf + * kbase_pm_callback_conf * * This should only be used before power management is powered up with * kbase_pm_powerup() * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_register_access_disable(struct kbase_device *kbdev); @@ -468,7 +433,8 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev); * function */ /** - * Check if the power management metrics collection is active. + * kbase_pm_metrics_is_active - Check if the power management metrics + * collection is active. * * Note that this returns if the power management metrics collection was * active at the time of calling, it is possible that after the call the metrics @@ -476,30 +442,31 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev); * * The caller must handle the consequence that the state may have changed. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @return true if metrics collection was active else false. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Return: true if metrics collection was active else false. */ bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); /** - * Power on the GPU, and any cores that are requested. + * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param is_resume true if power on due to resume after suspend, - * false otherwise + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if power on due to resume after suspend, + * false otherwise */ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); /** - * Power off the GPU, and any cores that have been requested. + * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been + * requested. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param is_suspend true if power off due to suspend, - * false otherwise - * @return true if power was turned off + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_suspend: true if power off due to suspend, + * false otherwise + * Return: + * true if power was turned off, else * false if power can not be turned off due to pending page/bus * fault workers. Caller must flush MMU workqueues and retry */ @@ -514,17 +481,18 @@ void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev); #ifdef CONFIG_MALI_MIDGARD_DVFS /** + * kbase_platform_dvfs_event - Report utilisation to DVFS code + * * Function provided by platform specific code when DVFS is enabled to allow * the power management metrics system to report utilisation. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) - * @param utilisation The current calculated utilisation by the metrics - * system. - * @param util_gl_share The current calculated gl share of utilisation. - * @param util_cl_share The current calculated cl share of utilisation per core - * group. - * @return Returns 0 on failure and non zero on success. + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * @util_gl_share: The current calculated gl share of utilisation. + * @util_cl_share: The current calculated cl share of utilisation per core + * group. + * Return: Returns 0 on failure and non zero on success. */ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, @@ -534,24 +502,15 @@ int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, void kbase_pm_power_changed(struct kbase_device *kbdev); /** - * Inform the metrics system that an atom is about to be run. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param katom The atom that is about to be run - */ -void kbase_pm_metrics_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * Inform the metrics system that an atom has been run and is being released. + * kbase_pm_metrics_update - Inform the metrics system that an atom is either + * about to be run or has just completed. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @now: Pointer to the timestamp of the change, or NULL to use current time * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param katom The atom that is about to be released + * Caller must hold runpool_irq.lock */ -void kbase_pm_metrics_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +void kbase_pm_metrics_update(struct kbase_device *kbdev, + ktime_t *now); #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c index b209c505bbd0..ae632564b96a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -17,14 +17,14 @@ -/** - * @file mali_kbase_pm_metrics.c +/* * Metrics for power management */ #include #include #include +#include /* When VSync is being hit aim for utilisation between 70-90% */ #define KBASE_PM_VSYNC_MIN_UTILISATION 70 @@ -46,13 +46,12 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { unsigned long flags; - enum kbase_pm_dvfs_action action; struct kbasep_pm_metrics_data *metrics; KBASE_DEBUG_ASSERT(timer != NULL); metrics = container_of(timer, struct kbasep_pm_metrics_data, timer); - action = kbase_pm_get_dvfs_action(metrics->kbdev); + kbase_pm_get_dvfs_action(metrics->kbdev); spin_lock_irqsave(&metrics->lock, flags); @@ -72,11 +71,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; - kbdev->pm.backend.metrics.vsync_hit = 0; - kbdev->pm.backend.metrics.utilisation = 0; - kbdev->pm.backend.metrics.util_cl_share[0] = 0; - kbdev->pm.backend.metrics.util_cl_share[1] = 0; - kbdev->pm.backend.metrics.util_gl_share = 0; kbdev->pm.backend.metrics.time_period_start = ktime_get(); kbdev->pm.backend.metrics.time_busy = 0; @@ -86,11 +80,11 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) kbdev->pm.backend.metrics.gpu_active = false; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx = 0; + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; kbdev->pm.backend.metrics.busy_cl[0] = 0; kbdev->pm.backend.metrics.busy_cl[1] = 0; kbdev->pm.backend.metrics.busy_gl = 0; - kbdev->pm.backend.metrics.nr_in_slots = 0; spin_lock_init(&kbdev->pm.backend.metrics.lock); @@ -105,8 +99,6 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); #endif /* CONFIG_MALI_MIDGARD_DVFS */ - kbase_pm_register_vsync_callback(kbdev); - return 0; } @@ -125,103 +117,10 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev) hrtimer_cancel(&kbdev->pm.backend.metrics.timer); #endif /* CONFIG_MALI_MIDGARD_DVFS */ - - kbase_pm_unregister_vsync_callback(kbdev); } KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); -/** - * kbasep_pm_record_gpu_active - update metrics tracking GPU active time - * - * This function updates the time the GPU was busy executing jobs in - * general and specifically for CL and GL jobs. Call this function when - * a job is submitted or removed from the GPU (job issue) slots. - * - * The busy time recorded is the time passed since the last time the - * busy/idle metrics were updated (e.g. by this function, - * kbasep_pm_record_gpu_idle or others). - * - * Note that the time we record towards CL and GL jobs accounts for - * the total number of CL and GL jobs active at that time. If 20ms - * has passed and 3 GL jobs were active, we account 3*20 ms towards - * the GL busy time. The number of CL/GL jobs active is tracked by - * kbase_pm_metrics_run_atom() / kbase_pm_metrics_release_atom(). - * - * The kbdev->pm.backend.metrics.lock needs to be held when calling - * this function. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -static void kbasep_pm_record_gpu_active(struct kbase_device *kbdev) -{ - ktime_t now = ktime_get(); - ktime_t diff; - u32 ns_time; - - lockdep_assert_held(&kbdev->pm.backend.metrics.lock); - - /* Record active time */ - diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); - ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); - kbdev->pm.backend.metrics.time_busy += ns_time; - kbdev->pm.backend.metrics.busy_gl += ns_time * - kbdev->pm.backend.metrics.active_gl_ctx; - kbdev->pm.backend.metrics.busy_cl[0] += ns_time * - kbdev->pm.backend.metrics.active_cl_ctx[0]; - kbdev->pm.backend.metrics.busy_cl[1] += ns_time * - kbdev->pm.backend.metrics.active_cl_ctx[1]; - /* Reset time period */ - kbdev->pm.backend.metrics.time_period_start = now; -} - -/** - * kbasep_pm_record_gpu_idle - update metrics tracking GPU idle time - * - * This function updates the time the GPU was idle (not executing any - * jobs) based on the time passed when kbasep_pm_record_gpu_active() - * was called last to record the last job on the GPU finishing. - * - * Call this function when no jobs are in the job slots of the GPU and - * a job is about to be submitted to a job slot. - * - * The kbdev->pm.backend.metrics.lock needs to be held when calling - * this function. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -static void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev) -{ - ktime_t now = ktime_get(); - ktime_t diff; - u32 ns_time; - - lockdep_assert_held(&kbdev->pm.backend.metrics.lock); - - /* Record idle time */ - diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); - ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); - kbdev->pm.backend.metrics.time_idle += ns_time; - /* Reset time period */ - kbdev->pm.backend.metrics.time_period_start = now; -} - -void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.vsync_hit = buffer_updated; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_pm_report_vsync); - -#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this * function */ @@ -230,26 +129,33 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, { ktime_t diff; - KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + if (ktime_to_ns(diff) < 0) + return; if (kbdev->pm.backend.metrics.gpu_active) { u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); kbdev->pm.backend.metrics.time_busy += ns_time; - kbdev->pm.backend.metrics.busy_cl[0] += ns_time * - kbdev->pm.backend.metrics.active_cl_ctx[0]; - kbdev->pm.backend.metrics.busy_cl[1] += ns_time * - kbdev->pm.backend.metrics.active_cl_ctx[1]; - kbdev->pm.backend.metrics.busy_gl += ns_time * - kbdev->pm.backend.metrics.active_gl_ctx; + if (kbdev->pm.backend.metrics.active_cl_ctx[0]) + kbdev->pm.backend.metrics.busy_cl[0] += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[1]) + kbdev->pm.backend.metrics.busy_cl[1] += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[0]) + kbdev->pm.backend.metrics.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[1]) + kbdev->pm.backend.metrics.busy_gl += ns_time; } else { kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); } + + kbdev->pm.backend.metrics.time_period_start = now; } +#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) /* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this * function. */ @@ -314,12 +220,12 @@ void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, * function */ int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, - int *util_gl_share, - int util_cl_share[2]) + int *util_gl_share, + int util_cl_share[2], + ktime_t now) { int utilisation; int busy; - ktime_t now = ktime_get(); kbase_pm_get_dvfs_utilisation_calc(kbdev, now); @@ -367,28 +273,27 @@ int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, } out: - kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); - return utilisation; } -enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) { unsigned long flags; int utilisation, util_gl_share; int util_cl_share[2]; - enum kbase_pm_dvfs_action action; + ktime_t now; KBASE_DEBUG_ASSERT(kbdev != NULL); spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + now = ktime_get(); + utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, - util_cl_share); + util_cl_share, now); if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || util_cl_share[1] < 0) { - action = KBASE_PM_DVFS_NOP; utilisation = 0; util_gl_share = 0; util_cl_share[0] = 0; @@ -396,39 +301,16 @@ enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev) goto out; } - if (kbdev->pm.backend.metrics.vsync_hit) { - /* VSync is being met */ - if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION) - action = KBASE_PM_DVFS_CLOCK_DOWN; - else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION) - action = KBASE_PM_DVFS_CLOCK_UP; - else - action = KBASE_PM_DVFS_NOP; - } else { - /* VSync is being missed */ - if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION) - action = KBASE_PM_DVFS_CLOCK_DOWN; - else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION) - action = KBASE_PM_DVFS_CLOCK_UP; - else - action = KBASE_PM_DVFS_NOP; - } - - kbdev->pm.backend.metrics.utilisation = utilisation; - kbdev->pm.backend.metrics.util_cl_share[0] = util_cl_share[0]; - kbdev->pm.backend.metrics.util_cl_share[1] = util_cl_share[1]; - kbdev->pm.backend.metrics.util_gl_share = util_gl_share; out: #ifdef CONFIG_MALI_MIDGARD_DVFS kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); #endif /*CONFIG_MALI_MIDGARD_DVFS */ - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); - return action; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } -KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action); bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) { @@ -447,86 +329,72 @@ KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); #endif /* CONFIG_MALI_MIDGARD_DVFS */ -/* called when job is submitted to a GPU slot */ -void kbase_pm_metrics_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +/** + * kbase_pm_metrics_active_calc - Update PM active counts based on currently + * running atoms + * @kbdev: Device pointer + * + * The caller must hold kbdev->pm.backend.metrics.lock + */ +static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) { - unsigned long flags; + int js; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - - /* We may have been idle before */ - if (kbdev->pm.backend.metrics.nr_in_slots == 0) { - WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[0] != 0); - WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[1] != 0); - WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx != 0); - - /* Record idle time */ - kbasep_pm_record_gpu_idle(kbdev); - - /* We are now active */ - WARN_ON(kbdev->pm.backend.metrics.gpu_active); - kbdev->pm.backend.metrics.gpu_active = true; + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); - } else { - /* Record active time */ - kbasep_pm_record_gpu_active(kbdev); - } + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.gpu_active = false; - /* Track number of jobs in GPU slots */ - WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == U8_MAX); - kbdev->pm.backend.metrics.nr_in_slots++; - - /* Track if it was a CL or GL one that was submitted to a GPU slot */ - if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - int device_nr = (katom->core_req & - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) - ? katom->device_nr : 0; - KBASE_DEBUG_ASSERT(device_nr < 2); - kbdev->pm.backend.metrics.active_cl_ctx[device_nr]++; - } else { - kbdev->pm.backend.metrics.active_gl_ctx++; + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + + /* Head atom may have just completed, so if it isn't running + * then try the next atom */ + if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) + katom = kbase_gpu_inspect(kbdev, js, 1); + + if (katom && katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + WARN_ON(device_nr >= 2); + kbdev->pm.backend.metrics.active_cl_ctx[ + device_nr] = 1; + } else { + /* Slot 2 should not be running non-compute + * atoms */ + WARN_ON(js >= 2); + kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + } + kbdev->pm.backend.metrics.gpu_active = true; + } } - - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } -/* called when job is removed from a GPU slot */ -void kbase_pm_metrics_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) +/* called when job is submitted to or removed from a GPU slot */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) { unsigned long flags; + ktime_t now; - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - - /* Track how long CL and/or GL jobs have been busy for */ - kbasep_pm_record_gpu_active(kbdev); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - /* Track number of jobs in GPU slots */ - WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == 0); - kbdev->pm.backend.metrics.nr_in_slots--; + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - /* We may become idle */ - if (kbdev->pm.backend.metrics.nr_in_slots == 0) { - KBASE_DEBUG_ASSERT(kbdev->pm.backend.metrics.gpu_active); - kbdev->pm.backend.metrics.gpu_active = false; + if (!timestamp) { + now = ktime_get(); + timestamp = &now; } - /* Track of the GPU jobs that are active which ones are CL and - * which GL */ - if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - int device_nr = (katom->core_req & - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) - ? katom->device_nr : 0; - KBASE_DEBUG_ASSERT(device_nr < 2); - - WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[device_nr] - == 0); - kbdev->pm.backend.metrics.active_cl_ctx[device_nr]--; - } else { - WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx == 0); - kbdev->pm.backend.metrics.active_gl_ctx--; - } + /* Track how long CL and/or GL jobs have been busy for */ + kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + + kbase_pm_metrics_active_calc(kbdev); spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index ba5c23928b85..e3c4829019ae 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -15,8 +15,7 @@ -/** - * @file mali_kbase_pm_policy.c +/* * Power policy API implementations */ @@ -47,7 +46,7 @@ static const struct kbase_pm_policy *const policy_list[] = { #endif /* CONFIG_MALI_NO_MALI */ }; -/** The number of policies available in the system. +/* The number of policies available in the system. * This is derived from the number of functions listed in policy_get_functions. */ #define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) @@ -148,6 +147,38 @@ static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, #endif /* CONFIG_MALI_TRACE_TIMELINE */ +/** + * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any + * requested shader cores + * @kbdev: Device pointer + */ +static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) +{ + u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + kbdev->pm.backend.desired_shader_state &= + ~kbdev->pm.backend.shader_poweroff_pending; + + kbdev->pm.backend.shader_poweroff_pending = 0; + + if (prev_shader_state != kbdev->pm.backend.desired_shader_state + || kbdev->pm.backend.ca_in_transition) { + bool cores_are_available; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); + + /* Don't need 'cores_are_available', + * because we don't return anything */ + CSTD_UNUSED(cores_are_available); + } +} + static enum hrtimer_restart kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) { @@ -180,31 +211,8 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) kbdev->pm.backend.shader_poweroff_pending_time >= 0); - if (!kbdev->pm.backend.shader_poweroff_pending_time) { - u64 prev_shader_state = - kbdev->pm.backend.desired_shader_state; - - kbdev->pm.backend.desired_shader_state &= - ~kbdev->pm.backend.shader_poweroff_pending; - - kbdev->pm.backend.shader_poweroff_pending = 0; - - if (prev_shader_state != - kbdev->pm.backend.desired_shader_state - || kbdev->pm.backend.ca_in_transition) { - bool cores_are_available; - - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); - cores_are_available = - kbase_pm_check_transitions_nolock( - kbdev); - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); - - /* Don't need 'cores_are_available', - * because we don't return anything */ - CSTD_UNUSED(cores_are_available); - } - } + if (!kbdev->pm.backend.shader_poweroff_pending_time) + kbasep_pm_do_poweroff_cores(kbdev); } spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); @@ -349,7 +357,9 @@ void kbase_pm_update_active(struct kbase_device *kbdev) } if (!kbdev->pm.backend.poweroff_timer_needed && - !kbdev->pm.backend.gpu_powered) { + !kbdev->pm.backend.gpu_powered && + (kbdev->pm.poweroff_gpu_ticks || + kbdev->pm.poweroff_shader_ticks)) { kbdev->pm.backend.poweroff_timer_needed = true; hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, @@ -373,18 +383,34 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* Request power off */ if (kbdev->pm.backend.gpu_powered) { - kbdev->pm.backend.gpu_poweroff_pending = - kbdev->pm.poweroff_gpu_ticks; - if (!kbdev->pm.backend.poweroff_timer_needed) { - /* Start timer if not running (eg if power - * policy has been changed from always_on to - * something else). This will ensure the GPU is - * actually powered off */ - kbdev->pm.backend.poweroff_timer_needed = true; - hrtimer_start( + if (kbdev->pm.poweroff_gpu_ticks) { + kbdev->pm.backend.gpu_poweroff_pending = + kbdev->pm.poweroff_gpu_ticks; + if (!kbdev->pm.backend.poweroff_timer_needed) { + /* Start timer if not running (eg if + * power policy has been changed from + * always_on to something else). This + * will ensure the GPU is actually + * powered off */ + kbdev->pm.backend.poweroff_timer_needed + = true; + hrtimer_start( &kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, - HRTIMER_MODE_REL); + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } + } else { + /* Power off the GPU immediately */ + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* GPU can not be powered off at present + */ + kbdev->pm.backend.poweroff_timer_needed + = true; + hrtimer_start( + &kbdev->pm.backend.gpu_poweroff_timer, + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } } } } @@ -394,6 +420,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { u64 desired_bitmap; bool cores_are_available; + bool do_poweroff = false; lockdep_assert_held(&kbdev->pm.power_change_lock); @@ -411,7 +438,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) if (kbdev->pm.backend.desired_shader_state != desired_bitmap) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap); - /* Are any cores being powered on? */ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || kbdev->pm.backend.ca_in_transition) { @@ -422,8 +448,12 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); - kbdev->pm.backend.shader_poweroff_pending_time = - kbdev->pm.poweroff_shader_ticks; + + if (kbdev->pm.poweroff_shader_ticks) + kbdev->pm.backend.shader_poweroff_pending_time = + kbdev->pm.poweroff_shader_ticks; + else + do_poweroff = true; } kbdev->pm.backend.desired_shader_state = desired_bitmap; @@ -435,8 +465,11 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); - kbdev->pm.backend.shader_poweroff_pending_time = + if (kbdev->pm.poweroff_shader_ticks) + kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; + else + kbasep_pm_do_poweroff_cores(kbdev); } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && kbdev->pm.backend.poweroff_timer_needed) { /* If power policy is keeping cores on despite there being no @@ -458,6 +491,12 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.shader_poweroff_pending_time = 0; } + /* Shader poweroff is deferred to the end of the function, to eliminate + * issues caused by the core availability policy recursing into this + * function */ + if (do_poweroff) + kbasep_pm_do_poweroff_cores(kbdev); + /* Don't need 'cores_are_available', because we don't return anything */ CSTD_UNUSED(cores_are_available); } @@ -550,7 +589,7 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_set_policy); -/** Check whether a state change has finished, and trace it as completed */ +/* Check whether a state change has finished, and trace it as completed */ static void kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) { diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h index fabb3cc970e2..611a90e66e65 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -147,10 +147,10 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev, * If the necessary cores are not currently available then the function will * return %KBASE_CORES_NOT_READY and have no effect. * - * Return: true if the job can be submitted to the hardware or false - * if the job is not ready to run. * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, + * * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, + * * %KBASE_CORES_READY if the cores requested are already available */ enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c index be7c3663e2bb..4bcde85f3ee1 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -15,11 +15,6 @@ - -/** - * - */ - #include #include #include diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h index 4ae1b0c304cb..35088abc8fe5 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,11 +15,6 @@ - -/** - * - */ - #ifndef _KBASE_BACKEND_TIME_H_ #define _KBASE_BACKEND_TIME_H_ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index bec9a5acf5c3..5a1523034c15 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -47,6 +47,7 @@ enum base_hw_feature { BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_END }; @@ -160,4 +161,5 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_END }; + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 7bd30dd72780..9fae0f6c806a 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -50,7 +50,6 @@ enum base_hw_issue { BASE_HW_ISSUE_8634, BASE_HW_ISSUE_8778, BASE_HW_ISSUE_8791, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_8833, BASE_HW_ISSUE_8879, BASE_HW_ISSUE_8896, @@ -135,7 +134,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_8634, BASE_HW_ISSUE_8778, BASE_HW_ISSUE_8791, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_8833, BASE_HW_ISSUE_8896, BASE_HW_ISSUE_8975, @@ -180,7 +178,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_8408, BASE_HW_ISSUE_8564, BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_8975, BASE_HW_ISSUE_9010, BASE_HW_ISSUE_9275, @@ -217,7 +214,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_8408, BASE_HW_ISSUE_8564, BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_8975, BASE_HW_ISSUE_9010, BASE_HW_ISSUE_9275, @@ -245,7 +241,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10127, BASE_HW_ISSUE_10327, @@ -276,7 +271,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, @@ -297,7 +291,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, @@ -315,7 +308,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -338,7 +330,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -361,7 +352,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -382,7 +372,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -405,7 +394,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -426,7 +414,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { }; static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -445,7 +432,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10684, @@ -461,7 +447,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10684, @@ -477,7 +462,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10684, @@ -551,7 +535,6 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { }; static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -568,7 +551,6 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { }; static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -584,7 +566,6 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { }; static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -599,7 +580,6 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { }; static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -625,7 +605,6 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { }; static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -641,7 +620,6 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { }; static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -656,7 +634,6 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { }; static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -682,7 +659,6 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { }; static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -698,7 +674,6 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { }; static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -725,7 +700,6 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { }; static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -741,7 +715,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { }; static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -757,7 +730,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { }; static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { - BASE_HW_ISSUE_8803, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -783,4 +755,6 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_END }; + + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 21fe319b2027..56621309c5ad 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -36,6 +36,12 @@ /* NB: To support UK6 we also need to support UK7 */ #define BASE_LEGACY_UK7_SUPPORT 1 +/* Support UK8 IOCTLS */ +#define BASE_LEGACY_UK8_SUPPORT 1 + +/* Support UK9 IOCTLS */ +#define BASE_LEGACY_UK9_SUPPORT 1 + typedef u64 base_mem_handle; #include "mali_base_mem_priv.h" @@ -151,10 +157,9 @@ enum { /* OUT */ BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU address for the alloc */ - /* IN */ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence - Outer shareable, required. */ + Outer shareable, required. */ BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ }; @@ -197,12 +202,6 @@ typedef enum base_mem_import_type { BASE_MEM_IMPORT_TYPE_UMM = 2 } base_mem_import_type; -/* legacy API wrappers */ -#define base_tmem_import_type base_mem_import_type -#define BASE_TMEM_IMPORT_TYPE_INVALID BASE_MEM_IMPORT_TYPE_INVALID -#define BASE_TMEM_IMPORT_TYPE_UMP BASE_MEM_IMPORT_TYPE_UMP -#define BASE_TMEM_IMPORT_TYPE_UMM BASE_MEM_IMPORT_TYPE_UMM - /** * @brief Invalid memory handle type. * Return value from functions returning @a base_mem_handle on error. @@ -217,6 +216,9 @@ typedef enum base_mem_import_type { #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ BASE_MEM_COOKIE_BASE) +/* Mask to detect 4GB boundary alignment */ +#define BASE_MEM_MASK_4GB 0xfffff000UL + /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -622,7 +624,7 @@ typedef struct base_jd_atom_v2 { kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ base_jd_core_req core_req; /**< core requirements */ - const struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, + struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, this is done in order to reduce possibility of improper assigment of a dependency field */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ @@ -662,17 +664,18 @@ typedef struct base_external_resource { * @param dep_type The dep_type to be assigned. * */ -static inline void base_jd_atom_dep_set(const struct base_dependency *const_dep, base_atom_id id, base_jd_dep_type dep_type) +static inline void base_jd_atom_dep_set(struct base_dependency *dep, + base_atom_id id, base_jd_dep_type dep_type) { - struct base_dependency *dep; + LOCAL_ASSERT(dep != NULL); - LOCAL_ASSERT(const_dep != NULL); - /* make sure we don't set not allowed combinations of atom_id/dependency_type */ + /* + * make sure we don't set not allowed combinations + * of atom_id/dependency_type. + */ LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); - dep = (struct base_dependency *)const_dep; - dep->atom_id = id; dep->dependency_type = dep_type; } @@ -684,11 +687,12 @@ static inline void base_jd_atom_dep_set(const struct base_dependency *const_dep, * @param[in] from The dependency to make a copy from. * */ -static inline void base_jd_atom_dep_copy(const struct base_dependency *const_dep, const struct base_dependency *from) +static inline void base_jd_atom_dep_copy(struct base_dependency *dep, + const struct base_dependency *from) { - LOCAL_ASSERT(const_dep != NULL); + LOCAL_ASSERT(dep != NULL); - base_jd_atom_dep_set(const_dep, from->atom_id, from->dependency_type); + base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); } /** @@ -964,27 +968,8 @@ typedef struct base_dump_cpu_gpu_counters { /** @} end group base_user_api_job_dispatch */ -#ifdef __KERNEL__ -/* - * The following typedefs should be removed when a midg types header is added. - * See MIDCOM-1657 for details. - */ -typedef u32 gpu_product_id; -typedef u32 gpu_cache_features; -typedef u32 gpu_tiler_features; -typedef u32 gpu_mem_features; -typedef u32 gpu_mmu_features; -typedef u32 gpu_js_features; -typedef u32 gpu_as_present; -typedef u32 gpu_js_present; - #define GPU_MAX_JOB_SLOTS 16 -#else -#include -#include -#endif - /** * @page page_base_user_api_gpuprops User-side Base GPU Property Query API * @@ -1208,7 +1193,7 @@ struct mali_base_gpu_core_props { /** * Product specific value. */ - gpu_product_id product_id; + u32 product_id; /** * Status of the GPU release. @@ -1350,7 +1335,7 @@ struct mali_base_gpu_coherent_group_info { * Coherency features of the memory, accessed by @ref gpu_mem_features * methods */ - gpu_mem_features coherency; + u32 coherency; u32 padding; @@ -1380,18 +1365,19 @@ struct gpu_raw_gpu_props { u64 shader_present; u64 tiler_present; u64 l2_present; - u64 unused_1; /* keep for backward compatibility */ + u32 coherency_enabled; + u32 unused_1; /* keep for backward compatibility */ - gpu_cache_features l2_features; + u32 l2_features; u32 suspend_size; /* API 8.2+ */ - gpu_mem_features mem_features; - gpu_mmu_features mmu_features; + u32 mem_features; + u32 mmu_features; - gpu_as_present as_present; + u32 as_present; u32 js_present; - gpu_js_features js_features[GPU_MAX_JOB_SLOTS]; - gpu_tiler_features tiler_features; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 tiler_features; u32 texture_features[3]; u32 gpu_id; diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index be3527820fa0..3e294da28ea0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -55,11 +55,12 @@ #include "mali_kbase_utility.h" #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" +#include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" -#include "mali_kbase_cpuprops.h" #include "mali_kbase_gpuprops.h" #include "mali_kbase_jm.h" #include "mali_kbase_vinstr.h" +#include "mali_kbase_ipa.h" #ifdef CONFIG_GPU_TRACEPOINTS #include #endif @@ -116,12 +117,33 @@ int kbase_jd_submit(struct kbase_context *kctx, int kbase_jd_submit(struct kbase_context *kctx, const struct kbase_uk_job_submit *submit_data); #endif + +/** + * kbase_jd_done_worker - Handle a job completion + * @data: a &struct work_struct + * + * This function requeues the job from the runpool (if it was soft-stopped or + * removed from NEXT registers). + * + * Removes it from the system if it finished/failed/was cancelled. + * + * Resolves dependencies to add dependent jobs to the context, potentially + * starting them if necessary (which may add more references to the context) + * + * Releases the reference to the context from the no-longer-running job. + * + * Handles retrying submission outside of IRQ context if it failed from within + * IRQ context. + */ +void kbase_jd_done_worker(struct work_struct *data); + void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); -bool jd_done_nolock(struct kbase_jd_atom *katom); +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx); void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, @@ -129,6 +151,8 @@ bool jd_submit_atom(struct kbase_context *kctx, void kbase_job_done(struct kbase_device *kbdev, u32 done); +void kbase_gpu_cacheclean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); /** * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms * and soft stop them @@ -496,4 +520,18 @@ void kbasep_trace_clear(struct kbase_device *kbdev); #endif /* KBASE_TRACE_ENABLE */ /** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ void kbasep_trace_dump(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_DEBUG +/** + * kbase_set_driver_inactive - Force driver to go inactive + * @kbdev: Device pointer + * @inactive: true if driver should go inactive, false otherwise + * + * Forcing the driver inactive will cause all future IOCTLs to wait until the + * driver is made active again. This is intended solely for the use of tests + * which require that no jobs are running while the test executes. + */ +void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive); +#endif /* CONFIG_MALI_DEBUG */ + #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index 51f934404e53..2fb5e3edf49f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -33,15 +33,8 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) CSTD_UNUSED(nr_pages); -#ifdef CONFIG_MALI_CACHE_COHERENT - /* Cache is completely coherent at hardware level. So always allocate - * cached memory. - */ - cache_flags |= KBASE_REG_CPU_CACHED; -#else if (flags & BASE_MEM_CACHED_CPU) cache_flags |= KBASE_REG_CPU_CACHED; -#endif /* (CONFIG_MALI_CACHE_COHERENT) */ return cache_flags; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h index d2487fdf21dc..0c18bdb357b0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h @@ -26,7 +26,7 @@ #include "mali_kbase.h" #include "mali_base_kernel.h" -#include "mali_kbase_device_internal.h" + /** * kbase_cache_enabled - Choose the cache policy for a specific region * @flags: flags describing attributes of the region diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h index add2f909e8d0..f282b9aeb9de 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -73,14 +73,10 @@ struct kbase_platform_funcs_conf { * Returns 0 on success, * negative error code otherwise. * - * Function pointer - * for platform specific initialization - * or NULL if no initialization function is required. - * This function - * will be called - * before any other callbacks - * listed in the struct kbase_attribute struct - * (such as Power Management callbacks). + * Function pointer for platform specific initialization or NULL if no + * initialization function is required. At the point this the GPU is + * not active and its power and clocks are in unknown (platform specific + * state) as kbase doesn't yet have control of power and clocks. * * The platform specific private pointer kbase_device::platform_context * can be accessed (and possibly initialized) in here. @@ -91,9 +87,8 @@ struct kbase_platform_funcs_conf { * @kbdev - kbase_device pointer * * Function pointer for platform specific termination or NULL if no - * termination function is required. This function will be called - * after any other callbacks listed in the struct kbase_attribute struct - * (such as Power Management callbacks). + * termination function is required. At the point this the GPU will be + * idle but still powered and clocked. * * The platform specific private pointer kbase_device::platform_context * can be accessed (and possibly terminated) in here. diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index e066b286c51f..3a6e9f867206 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -25,8 +25,6 @@ #include #include -#define MEMPOOL_PAGES 16384 - /** * kbase_create_context() - Create a kernel base context. @@ -41,7 +39,7 @@ struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat) { struct kbase_context *kctx; - int mali_err; + int err; KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -68,21 +66,24 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) atomic_set(&kctx->nonmapped_pages, 0); kctx->slots_pullable = 0; - if (kbase_mem_allocator_init(&kctx->osalloc, MEMPOOL_PAGES, kctx->kbdev) != 0) + err = kbase_mem_pool_init(&kctx->mem_pool, + kbdev->mem_pool_max_size_default, + kctx->kbdev, &kbdev->mem_pool); + if (err) goto free_kctx; - kctx->pgd_allocator = &kctx->osalloc; atomic_set(&kctx->used_pages, 0); - if (kbase_jd_init(kctx)) - goto free_allocator; + err = kbase_jd_init(kctx); + if (err) + goto free_pool; - mali_err = kbasep_js_kctx_init(kctx); - if (mali_err) + err = kbasep_js_kctx_init(kctx); + if (err) goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ - mali_err = kbase_event_init(kctx); - if (mali_err) + err = kbase_event_init(kctx); + if (err) goto free_jd; mutex_init(&kctx->reg_lock); @@ -92,15 +93,16 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif - mali_err = kbase_mmu_init(kctx); - if (mali_err) + err = kbase_mmu_init(kctx); + if (err) goto free_event; kctx->pgd = kbase_mmu_alloc_pgd(kctx); if (!kctx->pgd) goto free_mmu; - if (kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page) != 0) + kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool); + if (!kctx->aliasing_sink_page) goto no_sink_page; kctx->tgid = current->tgid; @@ -110,7 +112,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->cookies = KBASE_COOKIE_MASK; /* Make sure page 0 is not used... */ - if (kbase_region_tracker_init(kctx)) + err = kbase_region_tracker_init(kctx); + if (err) goto no_region_tracker; #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); @@ -126,8 +129,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) return kctx; no_region_tracker: + kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); no_sink_page: - kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); /* VM lock needed for the call to kbase_mmu_free_pgd */ kbase_gpu_vm_lock(kctx); kbase_mmu_free_pgd(kctx); @@ -140,8 +143,8 @@ free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); kbase_jd_exit(kctx); -free_allocator: - kbase_mem_allocator_term(&kctx->osalloc); +free_pool: + kbase_mem_pool_term(&kctx->mem_pool); free_kctx: vfree(kctx); out: @@ -191,7 +194,7 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_mmu_free_pgd(kctx); /* drop the aliasing sink page now that it can't be mapped anymore */ - kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); + kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); /* free pending region setups */ pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; @@ -222,7 +225,7 @@ void kbase_destroy_context(struct kbase_context *kctx) if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - kbase_mem_allocator_term(&kctx->osalloc); + kbase_mem_pool_term(&kctx->mem_pool); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); vfree(kctx); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index 1285ec7970a0..85389eb1c27c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -28,13 +28,16 @@ #ifdef CONFIG_MALI_DEVFREQ #include #endif /* CONFIG_MALI_DEVFREQ */ -#include #ifdef CONFIG_MALI_NO_MALI #include "mali_kbase_model_linux.h" #endif /* CONFIG_MALI_NO_MALI */ #include "mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_debug_mem_view.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_mem_pool_debugfs.h" #include +#include +#include #ifdef CONFIG_KDS #include @@ -74,6 +77,7 @@ #include #endif /* CONFIG_PM_DEVFREQ */ #include +#include #include @@ -81,7 +85,6 @@ #include #endif - #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) #include #else @@ -350,16 +353,21 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; unsigned int lpu_id; + unsigned int as_nr; struct kbasep_kctx_list_element *element; /* Create LPU objects. */ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - gpu_js_features *lpu = + u32 *lpu = &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, (u32)*lpu); + kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, *lpu); } - /* Create GPU object and make it retain all LPUs. */ + /* Create Address Space objects. */ + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + kbase_tlstream_tl_summary_new_as(&kbdev->as[as_nr], as_nr); + + /* Create GPU object and make it retain all LPUs and address spaces. */ kbase_tlstream_tl_summary_new_gpu( kbdev, kbdev->gpu_props.props.raw_props.gpu_id, @@ -370,6 +378,10 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev); } + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + kbase_tlstream_tl_summary_lifelink_as_gpu( + &kbdev->as[as_nr], + kbdev); /* Create object for each known context. */ mutex_lock(&kbdev->kctx_list_lock); @@ -409,6 +421,22 @@ static void kbase_api_handshake(struct uku_version_check_args *version) version->minor = 1; break; #endif /* BASE_LEGACY_UK7_SUPPORT */ +#ifdef BASE_LEGACY_UK8_SUPPORT + case 8: + /* We are backwards compatible with version 8, + * so pretend to be the old version */ + version->major = 8; + version->minor = 4; + break; +#endif /* BASE_LEGACY_UK8_SUPPORT */ +#ifdef BASE_LEGACY_UK9_SUPPORT + case 9: + /* We are backwards compatible with version 9, + * so pretend to be the old version */ + version->major = 9; + version->minor = 0; + break; +#endif /* BASE_LEGACY_UK8_SUPPORT */ case BASE_UK_VERSION_MAJOR: /* set minor to be the lowest common */ version->minor = min_t(int, BASE_UK_VERSION_MINOR, @@ -439,6 +467,21 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; +#ifdef CONFIG_MALI_DEBUG +#define INACTIVE_WAIT_MS (5000) + +void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive) +{ + kbdev->driver_inactive = inactive; + wake_up(&kbdev->driver_inactive_wait); + + /* Wait for any running IOCTLs to complete */ + if (inactive) + msleep(INACTIVE_WAIT_MS); +} +KBASE_EXPORT_TEST_API(kbase_set_driver_inactive); +#endif /* CONFIG_MALI_DEBUG */ + static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size) { struct kbase_device *kbdev; @@ -451,6 +494,11 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg id = ukh->id; ukh->ret = MALI_ERROR_NONE; /* Be optimistic */ +#ifdef CONFIG_MALI_DEBUG + wait_event(kbdev->driver_inactive_wait, + kbdev->driver_inactive == false); +#endif /* CONFIG_MALI_DEBUG */ + if (UKP_FUNC_ID_CHECK_VERSION == id) { struct uku_version_check_args *version_check; @@ -477,7 +525,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg /* setup pending, try to signal that we'll do the setup, * if setup was already in progress, err this call */ - if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1)) + if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) return -EINVAL; /* if unexpected call, will stay stuck in setup mode @@ -619,7 +667,8 @@ copy_failed: if (kbase_mem_commit(kctx, commit->gpu_addr, commit->pages, - (base_backing_threshold_status *)&commit->result_subcode)) + (base_backing_threshold_status *) + &commit->result_subcode) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -667,7 +716,8 @@ copy_failed: break; } - if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask)) + if (kbase_mem_flags_change(kctx, fc->gpu_va, + fc->flags, fc->mask) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -685,7 +735,7 @@ copy_failed: break; } - if (kbase_mem_free(kctx, mem->gpu_addr)) + if (kbase_mem_free(kctx, mem->gpu_addr) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; } @@ -733,7 +783,7 @@ copy_failed: break; } -#ifndef CONFIG_MALI_CACHE_COHERENT +#ifndef CONFIG_MALI_COH_USER if (kbase_sync_now(kctx, &sn->sset) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; #endif @@ -761,12 +811,21 @@ copy_failed: case KBASE_FUNC_HWCNT_SETUP: { struct kbase_uk_hwcnt_setup *setup = args; + bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; + access_allowed = kbase_security_has_capability( + kctx, + KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, + KBASE_SEC_FLAG_NOAUDIT); + if (!access_allowed) + goto out_bad; + mutex_lock(&kctx->vinstr_cli_lock); - if (kbase_instr_hwcnt_setup(kctx, setup) != 0) + if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx, + &kctx->vinstr_cli, setup) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; mutex_unlock(&kctx->vinstr_cli_lock); break; @@ -776,7 +835,8 @@ copy_failed: { /* args ignored */ mutex_lock(&kctx->vinstr_cli_lock); - if (kbase_instr_hwcnt_dump(kctx) != 0) + if (kbase_vinstr_hwc_dump(kctx->vinstr_cli, + BASE_HWCNT_READER_EVENT_MANUAL) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; mutex_unlock(&kctx->vinstr_cli_lock); break; @@ -786,26 +846,34 @@ copy_failed: { /* args ignored */ mutex_lock(&kctx->vinstr_cli_lock); - if (kbase_vinstr_clear(kbdev->vinstr_ctx, - kctx->vinstr_cli) != 0) + if (kbase_vinstr_hwc_clear(kctx->vinstr_cli) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; mutex_unlock(&kctx->vinstr_cli_lock); break; } -#ifdef BASE_LEGACY_UK7_SUPPORT - case KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE: + case KBASE_FUNC_HWCNT_READER_SETUP: { - struct kbase_uk_cpuprops *setup = args; + struct kbase_uk_hwcnt_reader_setup *setup = args; + bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; - if (kbase_cpuprops_uk_get_props(kctx, setup) != 0) + access_allowed = kbase_security_has_capability( + kctx, + KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, + KBASE_SEC_FLAG_NOAUDIT); + if (!access_allowed) + goto out_bad; + + mutex_lock(&kctx->vinstr_cli_lock); + if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx, + setup) != 0) ukh->ret = MALI_ERROR_FUNCTION_FAILED; + mutex_unlock(&kctx->vinstr_cli_lock); break; } -#endif /* BASE_LEGACY_UK7_SUPPORT */ case KBASE_FUNC_GPU_PROPS_REG_DUMP: { @@ -969,6 +1037,15 @@ copy_failed: break; } +#ifdef BASE_LEGACY_UK8_SUPPORT + case KBASE_FUNC_KEEP_GPU_POWERED: + { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_KEEP_GPU_POWERED: function is deprecated and disabled\n"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } +#endif /* BASE_LEGACY_UK8_SUPPORT */ + case KBASE_FUNC_GET_PROFILING_CONTROLS: { struct kbase_uk_profiling_controls *controls = @@ -1097,25 +1174,6 @@ copy_failed: } #endif /* MALI_UNIT_TEST */ #endif /* CONFIG_MALI_MIPE_ENABLED */ - /* used to signal the job core dump on fault has terminated and release the - * refcount of the context to let it be removed. It requires at least - * BASE_UK_VERSION_MAJOR to be 8 and BASE_UK_VERSION_MINOR to be 1 in the - * UK interface. - */ - case KBASE_FUNC_DUMP_FAULT_TERM: - { -#if 2 == MALI_INSTRUMENTATION_LEVEL - if (atomic_read(&kctx->jctx.sched_info.ctx.fault_count) > 0 && - kctx->jctx.sched_info.ctx.is_scheduled) - - kbasep_js_dump_fault_term(kbdev, kctx); - - break; -#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */ - - /* This IOCTL should only be called when instr=2 at compile time. */ - goto out_bad; - } case KBASE_FUNC_GET_CONTEXT_ID: { @@ -1226,19 +1284,24 @@ static int kbase_open(struct inode *inode, struct file *filp) goto out; } -#ifdef CONFIG_MALI_CACHE_COHERENT +#ifdef CONFIG_MALI_COH_USER /* if cache is completely coherent at hardware level, then remove the * infinite cache control support from debugfs. */ #else debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, &kctx->infinite_cache_active); -#endif /* CONFIG_MALI_CACHE_COHERENT */ +#endif /* CONFIG_MALI_COH_USER */ kbasep_mem_profile_debugfs_add(kctx); kbasep_jd_debugfs_ctx_add(kctx); kbase_debug_mem_view_init(filp); -#endif + + kbase_debug_job_fault_context_init(kctx); + + kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); + +#endif /* CONFIG_DEBUGFS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1282,6 +1345,7 @@ static int kbase_release(struct inode *inode, struct file *filp) #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); kbasep_mem_profile_debugfs_remove(kctx); + kbase_debug_job_fault_context_exit(kctx); #endif mutex_lock(&kbdev->kctx_list_lock); @@ -1301,7 +1365,13 @@ static int kbase_release(struct inode *inode, struct file *filp) mutex_lock(&kctx->vinstr_cli_lock); /* If this client was performing hwcnt dumping and did not explicitly * detach itself, remove it from the vinstr core now */ - kbase_vinstr_detach_client(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli); + if (kctx->vinstr_cli) { + struct kbase_uk_hwcnt_setup setup; + + setup.dump_buffer = 0llu; + kbase_vinstr_legacy_hwc_setup( + kbdev->vinstr_ctx, &kctx->vinstr_cli, &setup); + } mutex_unlock(&kctx->vinstr_cli_lock); kbase_destroy_context(kctx); @@ -1354,7 +1424,8 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (filp->f_flags & O_NONBLOCK) return -EAGAIN; - if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx))) + if (wait_event_interruptible(kctx->event_queue, + kbase_event_pending(kctx)) != 0) return -ERESTARTSYS; } if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { @@ -1363,7 +1434,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof goto out; } - if (copy_to_user(buf, &uevent, sizeof(uevent))) + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) return -EFAULT; buf += sizeof(uevent); @@ -1752,7 +1823,9 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, return -ENODEV; ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Available core mask : 0x%llX\n", + kbdev->gpu_props.props.raw_props.shader_present); return ret; } @@ -1783,7 +1856,8 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, if (rc) return rc; - if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask || + if ((new_core_mask & kbdev->gpu_props.props.raw_props.shader_present) + != new_core_mask || !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) { dev_err(dev, "power_policy: invalid core specification\n"); return -EINVAL; @@ -2657,9 +2731,6 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, -#ifdef MALI_INCLUDE_TMIX - { .id = GPU_ID_PI_TMIX, .name = "Mali-TMIx" }, -#endif /* MALI_INCLUDE_TMIX */ }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -2895,6 +2966,109 @@ static ssize_t show_reset_timeout(struct device *dev, static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, set_reset_timeout); + + +static ssize_t show_mem_pool_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%zu\n", + kbase_mem_pool_size(&kbdev->mem_pool)); + + return ret; +} + +static ssize_t set_mem_pool_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + size_t new_size; + int err; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + err = kstrtoul(buf, 0, (unsigned long *)&new_size); + if (err) + return err; + + kbase_mem_pool_trim(&kbdev->mem_pool, new_size); + + return count; +} + +static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, + set_mem_pool_size); + +static ssize_t show_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%zu\n", + kbase_mem_pool_max_size(&kbdev->mem_pool)); + + return ret; +} + +static ssize_t set_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + size_t new_max_size; + int err; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + err = kstrtoul(buf, 0, (unsigned long *)&new_max_size); + if (err) + return -EINVAL; + + kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size); + + return count; +} + +static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, + set_mem_pool_max_size); + + + +static int kbasep_secure_mode_init(struct kbase_device *kbdev) +{ + +#ifdef SECURE_CALLBACKS + kbdev->secure_ops = SECURE_CALLBACKS; + kbdev->secure_mode_support = false; + + if (kbdev->secure_ops) { + int err; + + /* Make sure secure mode is disabled on startup */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); + + /* secure_mode_disable() returns -EINVAL if not supported */ + kbdev->secure_mode_support = (err != -EINVAL); + } +#endif + + return 0; +} + #ifdef CONFIG_MALI_NO_MALI static int kbase_common_reg_map(struct kbase_device *kbdev) { @@ -2976,6 +3150,30 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ +static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p) +{ + struct kbase_device *kbdev = m->private; + + if (!kbdev->secure_mode_support) + seq_puts(m, "unsupported\n"); + else + seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N"); + + return 0; +} + +static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_secure_mode_seq_show, in->i_private); +} + +static const struct file_operations kbasep_secure_mode_debugfs_fops = { + .open = kbasep_secure_mode_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3004,7 +3202,7 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) err = -ENOMEM; goto out; } - + kbase_debug_job_fault_dev_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, @@ -3024,6 +3222,10 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) &kbdev->infinite_cache_active_default); #endif /* CONFIG_MALI_COH_USER */ + debugfs_create_size_t("mem_pool_max_size", 0644, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_max_size_default); + #if KBASE_TRACE_ENABLE kbasep_trace_debugfs_init(kbdev); #endif /* KBASE_TRACE_ENABLE */ @@ -3032,6 +3234,10 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_trace_timeline_debugfs_init(kbdev); #endif /* CONFIG_MALI_TRACE_TIMELINE */ + debugfs_create_file("secure_mode", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_secure_mode_debugfs_fops); + return 0; out: @@ -3053,6 +3259,65 @@ static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } #endif /* CONFIG_DEBUG_FS */ +static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id) +{ + u32 selected_coherency = COHERENCY_NONE; + /* COHERENCY_NONE is always supported */ + u32 supported_coherency_bitmap = COHERENCY_FEATURE_BIT(COHERENCY_NONE); + +#ifdef CONFIG_OF + const void *coherency_override_dts; + u32 override_coherency; +#endif /* CONFIG_OF */ + + kbdev->system_coherency = selected_coherency; + + /* device tree may override the coherency */ +#ifdef CONFIG_OF + coherency_override_dts = of_get_property(kbdev->dev->of_node, + "override-coherency", + NULL); + if (coherency_override_dts) { + + override_coherency = be32_to_cpup(coherency_override_dts); + + if ((override_coherency <= COHERENCY_NONE) && + (supported_coherency_bitmap & + COHERENCY_FEATURE_BIT(override_coherency))) { + + kbdev->system_coherency = override_coherency; + + dev_info(kbdev->dev, + "Using coherency override, mode %u set from dtb", + override_coherency); + } else + dev_warn(kbdev->dev, + "Ignoring invalid coherency override, mode %u set from dtb", + override_coherency); + } + +#endif /* CONFIG_OF */ + + kbdev->gpu_props.props.raw_props.coherency_features = + kbdev->system_coherency; +} + +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + +/* Callback used by the kbase bus logger client, to initiate a GPU reset + * when the bus log is restarted. GPU reset is used as reference point + * in HW bus log analyses. + */ +static void kbase_logging_started_cb(void *data) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); +} +#endif + static int kbase_common_device_init(struct kbase_device *kbdev) { @@ -3061,19 +3326,7 @@ static int kbase_common_device_init(struct kbase_device *kbdev) enum { inited_mem = (1u << 0), inited_js = (1u << 1), - inited_debug = (1u << 2), - inited_js_softstop = (1u << 3), - inited_js_timeouts = (1u << 4), -#if !MALI_CUSTOMER_RELEASE - inited_force_replay = (1u << 5), -#endif /* !MALI_CUSTOMER_RELEASE */ inited_pm_runtime_init = (1u << 6), -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - inited_sc_split = (1u << 7), -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ -#ifdef CONFIG_MALI_TRACE_TIMELINE - inited_timeline = (1u << 8), -#endif /* CONFIG_MALI_TRACE_TIMELINE */ #ifdef CONFIG_MALI_DEVFREQ inited_devfreq = (1u << 9), #endif /* CONFIG_MALI_DEVFREQ */ @@ -3083,15 +3336,12 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited_backend_early = (1u << 11), inited_backend_late = (1u << 12), inited_device = (1u << 13), - inited_gpuinfo = (1u << 14), - inited_dvfs_period = (1u << 15), - inited_pm_poweroff = (1u << 16), - inited_reset_timeout = (1u << 17), - inited_js_scheduling_period = (1u << 18), - inited_vinstr = (1u << 19) + inited_vinstr = (1u << 19), + inited_ipa = (1u << 20) }; int inited = 0; + u32 gpu_id; #if defined(CONFIG_MALI_PLATFORM_VEXPRESS) u32 ve_logic_tile = 0; #endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ @@ -3163,6 +3413,14 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_vinstr; + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "Can't initialize IPA\n"); + goto out_partial; + } + + inited |= inited_ipa; + if (kbdev->pm.callback_power_runtime_init) { err = kbdev->pm.callback_power_runtime_init(kbdev); if (err) @@ -3177,92 +3435,21 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_mem; - kbdev->system_coherency = COHERENCY_NONE; + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + kbase_device_coherency_init(kbdev, gpu_id); - err = kbasep_js_devdata_init(kbdev); + err = kbasep_secure_mode_init(kbdev); if (err) goto out_partial; - inited |= inited_js; - -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - err = device_create_file(kbdev->dev, &dev_attr_sc_split); - if (err) { - dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n"); - goto out_partial; - } - - inited |= inited_sc_split; -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - -#ifdef CONFIG_MALI_DEBUG - - err = device_create_file(kbdev->dev, &dev_attr_debug_command); - if (err) { - dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n"); - goto out_partial; - } - inited |= inited_debug; - - err = device_create_file(kbdev->dev, &dev_attr_js_softstop_always); - if (err) { - dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n"); - goto out_partial; - } - inited |= inited_js_softstop; -#endif /* CONFIG_MALI_DEBUG */ - - err = device_create_file(kbdev->dev, &dev_attr_js_timeouts); - if (err) { - dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n"); - goto out_partial; - } - inited |= inited_js_timeouts; - -#if !MALI_CUSTOMER_RELEASE - err = device_create_file(kbdev->dev, &dev_attr_force_replay); - if (err) { - dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n"); - goto out_partial; - } - inited |= inited_force_replay; -#endif /* !MALI_CUSTOMER_RELEASE */ - - err = device_create_file(kbdev->dev, &dev_attr_gpuinfo); - if (err) { - dev_err(kbdev->dev, "Couldn't create gpuinfo sysfs file\n"); - goto out_partial; - } - inited |= inited_gpuinfo; - - err = device_create_file(kbdev->dev, &dev_attr_dvfs_period); - if (err) { - dev_err(kbdev->dev, "Couldn't create dvfs_period sysfs file\n"); - goto out_partial; - } - inited |= inited_dvfs_period; - - err = device_create_file(kbdev->dev, &dev_attr_pm_poweroff); - if (err) { - dev_err(kbdev->dev, "Couldn't create pm_poweroff sysfs file\n"); - goto out_partial; - } - inited |= inited_pm_poweroff; - - err = device_create_file(kbdev->dev, &dev_attr_reset_timeout); - if (err) { - dev_err(kbdev->dev, "Couldn't create reset_timeout sysfs file\n"); + err = kbasep_js_devdata_init(kbdev); + if (err) goto out_partial; - } - inited |= inited_reset_timeout; - err = device_create_file(kbdev->dev, &dev_attr_js_scheduling_period); - if (err) { - dev_err(kbdev->dev, "Couldn't create js_scheduling_period sysfs file\n"); - goto out_partial; - } - inited |= inited_js_scheduling_period; + inited |= inited_js; #ifdef CONFIG_MALI_MIPE_ENABLED err = kbase_tlstream_init(); @@ -3287,10 +3474,6 @@ static int kbase_common_device_init(struct kbase_device *kbdev) inited |= inited_devfreq; #endif /* CONFIG_MALI_DEVFREQ */ -#ifdef SECURE_CALLBACKS - kbdev->secure_ops = SECURE_CALLBACKS; -#endif - err = kbase_device_debugfs_init(kbdev); if (err) goto out_partial; @@ -3306,30 +3489,10 @@ static int kbase_common_device_init(struct kbase_device *kbdev) err = misc_register(&kbdev->mdev); if (err) { - dev_err(kbdev->dev, "Couldn't register misc dev %s\n", - kbdev->devname); + dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); goto out_misc; } - err = device_create_file(kbdev->dev, &dev_attr_power_policy); - if (err) { - dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n"); - goto out_file; - } - - err = device_create_file(kbdev->dev, - &dev_attr_core_availability_policy); - if (err) { - dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n"); - goto out_file_core_availability_policy; - } - - err = device_create_file(kbdev->dev, &dev_attr_core_mask); - if (err) { - dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n"); - goto out_file_core_mask; - } - { const struct list_head *dev_list = kbase_dev_list_get(); @@ -3337,21 +3500,16 @@ static int kbase_common_device_init(struct kbase_device *kbdev) kbase_dev_list_put(dev_list); } - dev_info(kbdev->dev, "Probed as %s\n", - dev_name(kbdev->mdev.this_device)); + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); return 0; -out_file_core_mask: - device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); -out_file_core_availability_policy: - device_remove_file(kbdev->dev, &dev_attr_power_policy); -out_file: - misc_deregister(&kbdev->mdev); out_misc: put_device(kbdev->dev); kbase_device_debugfs_term(kbdev); out_partial: + if (inited & inited_ipa) + kbase_ipa_term(kbdev->ipa_ctx); if (inited & inited_vinstr) kbase_vinstr_term(kbdev->vinstr_ctx); #ifdef CONFIG_MALI_DEVFREQ @@ -3365,37 +3523,6 @@ out_partial: kbase_tlstream_term(); #endif /* CONFIG_MALI_MIPE_ENABLED */ - if (inited & inited_js_scheduling_period) - device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period); - if (inited & inited_reset_timeout) - device_remove_file(kbdev->dev, &dev_attr_reset_timeout); - if (inited & inited_pm_poweroff) - device_remove_file(kbdev->dev, &dev_attr_pm_poweroff); - if (inited & inited_dvfs_period) - device_remove_file(kbdev->dev, &dev_attr_dvfs_period); -#if !MALI_CUSTOMER_RELEASE - if (inited & inited_force_replay) - device_remove_file(kbdev->dev, &dev_attr_force_replay); -#endif /* !MALI_CUSTOMER_RELEASE */ - if (inited & inited_js_timeouts) - device_remove_file(kbdev->dev, &dev_attr_js_timeouts); -#ifdef CONFIG_MALI_DEBUG - if (inited & inited_js_softstop) - device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); - - if (inited & inited_debug) - device_remove_file(kbdev->dev, &dev_attr_debug_command); - -#endif /* CONFIG_MALI_DEBUG */ - -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - if (inited & inited_sc_split) - device_remove_file(kbdev->dev, &dev_attr_sc_split); -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - - if (inited & inited_gpuinfo) - device_remove_file(kbdev->dev, &dev_attr_gpuinfo); - if (inited & inited_js) kbasep_js_devdata_halt(kbdev); @@ -3423,11 +3550,42 @@ out_partial: } +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + &dev_attr_sc_split.attr, +#endif +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif +#if !MALI_CUSTOMER_RELEASE + &dev_attr_force_replay.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_availability_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + NULL +}; + +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; + +static int kbase_common_device_remove(struct kbase_device *kbdev); + static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; struct resource *reg_res; - int err; + int err = 0; int i; printk(KERN_INFO "arm_release_ver of this mali_ko is '%s', rk_ko_ver is '%d', built at '%s', on '%s'.", @@ -3459,7 +3617,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev) #endif /* CONFIG_MALI_NO_MALI */ kbdev->dev = &pdev->dev; - /* 3 IRQ resources */ for (i = 0; i < 3; i++) { struct resource *irq_res; @@ -3491,21 +3648,21 @@ static int kbase_platform_device_probe(struct platform_device *pdev) kbdev->irqs[irqtag].irq = irq_res->start; kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); } + /* the first memory resource is the physical address of the GPU + * registers */ + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + err = -ENOENT; + goto out_platform_mem; + } - /* the first memory resource is the physical address of the GPU registers */ - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - err = -ENOENT; - goto out_platform_mem; - } - - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); - err = kbase_common_reg_map(kbdev); - if (err) - goto out_reg_map; + err = kbase_common_reg_map(kbdev); + if (err) + goto out_reg_map; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ && defined(CONFIG_REGULATOR) @@ -3547,9 +3704,38 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); goto out_common_init; } + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (err) { + dev_err(&pdev->dev, "Failed to create sysfs entries\n"); + goto out_sysfs; + } + +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err) { + dev_err(kbdev->dev, "Couldn't register bus log client\n"); + goto out_bl_core_register; + } + + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); +#endif return 0; +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER +out_bl_core_register: + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); +#endif + +out_sysfs: + kbase_common_device_remove(kbdev); out_common_init: +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) + of_free_opp_table(kbdev->dev); +#endif clk_disable_unprepare(kbdev->clock); out_clock_prepare: clk_put(kbdev->clock); @@ -3560,7 +3746,7 @@ out_clock_prepare: && defined(CONFIG_REGULATOR) regulator_put(kbdev->regulator); #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - kbase_common_reg_unmap(kbdev); + kbase_common_reg_unmap(kbdev); out_reg_map: out_platform_mem: #ifdef CONFIG_OF @@ -3578,7 +3764,15 @@ out: static int kbase_common_device_remove(struct kbase_device *kbdev) { + kbase_ipa_term(kbdev->ipa_ctx); kbase_vinstr_term(kbdev->vinstr_ctx); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + if (kbdev->buslogger) + bl_core_client_unregister(kbdev->buslogger); +#endif + #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kbdev->mali_debugfs_directory); #endif @@ -3593,32 +3787,11 @@ static int kbase_common_device_remove(struct kbase_device *kbdev) #ifdef CONFIG_MALI_PLATFORM_DEVICETREE pm_runtime_disable(kbdev->dev); #endif - device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period); - device_remove_file(kbdev->dev, &dev_attr_reset_timeout); - device_remove_file(kbdev->dev, &dev_attr_pm_poweroff); - device_remove_file(kbdev->dev, &dev_attr_dvfs_period); - device_remove_file(kbdev->dev, &dev_attr_power_policy); - device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); - device_remove_file(kbdev->dev, &dev_attr_core_mask); #ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_term(); #endif /* CONFIG_MALI_MIPE_ENABLED */ -#ifdef CONFIG_MALI_DEBUG - device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); - device_remove_file(kbdev->dev, &dev_attr_debug_command); -#endif /* CONFIG_MALI_DEBUG */ - device_remove_file(kbdev->dev, &dev_attr_js_timeouts); -#if !MALI_CUSTOMER_RELEASE - device_remove_file(kbdev->dev, &dev_attr_force_replay); -#endif /* !MALI_CUSTOMER_RELEASE */ - -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - device_remove_file(kbdev->dev, &dev_attr_sc_split); -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ - device_remove_file(kbdev->dev, &dev_attr_gpuinfo); - kbasep_js_devdata_halt(kbdev); kbase_mem_halt(kbdev); @@ -3634,7 +3807,7 @@ static int kbase_common_device_remove(struct kbase_device *kbdev) } misc_deregister(&kbdev->mdev); put_device(kbdev->dev); - kbase_common_reg_unmap(kbdev); + kbase_common_reg_unmap(kbdev); kbase_device_term(kbdev); if (kbdev->clock) { clk_disable_unprepare(kbdev->clock); @@ -3720,7 +3893,7 @@ static int kbase_device_resume(struct device *dev) * * @return A standard Linux error code */ -#ifdef CONFIG_PM_RUNTIME +#ifdef KBASE_PM_RUNTIME static int kbase_device_runtime_suspend(struct device *dev) { struct kbase_device *kbdev = to_kbase_device(dev); @@ -3739,7 +3912,7 @@ static int kbase_device_runtime_suspend(struct device *dev) } return 0; } -#endif /* CONFIG_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ /** Runtime resume callback from the OS. * @@ -3750,7 +3923,7 @@ static int kbase_device_runtime_suspend(struct device *dev) * @return A standard Linux error code */ -#ifdef CONFIG_PM_RUNTIME +#ifdef KBASE_PM_RUNTIME int kbase_device_runtime_resume(struct device *dev) { int ret = 0; @@ -3771,7 +3944,7 @@ int kbase_device_runtime_resume(struct device *dev) return ret; } -#endif /* CONFIG_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ /** Runtime idle callback from the OS. * @@ -3783,24 +3956,24 @@ int kbase_device_runtime_resume(struct device *dev) * @return A standard Linux error code */ -#ifdef CONFIG_PM_RUNTIME +#ifdef KBASE_PM_RUNTIME static int kbase_device_runtime_idle(struct device *dev) { /* Avoid pm_runtime_suspend being called */ return 1; } -#endif /* CONFIG_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ /** The power management operations for the platform driver. */ static const struct dev_pm_ops kbase_pm_ops = { .suspend = kbase_device_suspend, .resume = kbase_device_resume, -#ifdef CONFIG_PM_RUNTIME +#ifdef KBASE_PM_RUNTIME .runtime_suspend = kbase_device_runtime_suspend, .runtime_resume = kbase_device_runtime_resume, .runtime_idle = kbase_device_runtime_idle, -#endif /* CONFIG_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ }; #ifdef CONFIG_OF diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c deleted file mode 100644 index 5b62539dfbab..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -/* - * Base kernel property query APIs - */ - -#include "mali_kbase.h" -#ifdef BASE_LEGACY_UK7_SUPPORT - -#include "mali_kbase_cpuprops.h" -#include "mali_kbase_uku.h" -#include -#include -#include -#include -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) -#include -#endif - -#define KBASE_DEFAULT_CPU_NUM 0 - -#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT - -/* - * Macros used to extract cpu id info - * see documentation for Main ID register - */ -#define KBASE_CPUPROPS_ID_GET_REV(cpuid) ((cpuid) & 0x0F) /* [3:0] Revision */ -#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)(((cpuid) >> 4) & 0xFFF) /* [15:4] Part number */ -#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid) (((cpuid) >> 16) & 0x0F) /* [19:16] Architecture */ -#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)(((cpuid) >> 20) & 0x0F) /* [23:20] Variant */ -#define KBASE_CPUPROPS_ID_GET_CODE(cpuid) (((cpuid) >> 24) & 0xFF) /* [31:23] ASCII code of implementer trademark */ - -/*Below value sourced from OSK*/ -#define L1_DCACHE_SIZE ((u32)0x00008000) - -/** - * kbasep_cpuprops_uk_get_cpu_id_info - Retrieves detailed CPU info from given - * cpu_val ( ID reg ) - * @kbase_props: CPU props to be filled-in with cpu id info - * - */ -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) -static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props) -{ - kbase_props->props.cpu_id.id = read_cpuid_id(); - - kbase_props->props.cpu_id.valid = 1; - kbase_props->props.cpu_id.rev = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id); - kbase_props->props.cpu_id.part = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id); - kbase_props->props.cpu_id.arch = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id); - kbase_props->props.cpu_id.variant = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id); - kbase_props->props.cpu_id.implementer = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id); -} -#else -static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props) -{ - kbase_props->props.cpu_id.id = 0; - kbase_props->props.cpu_id.valid = 0; - kbase_props->props.cpu_id.rev = 0; - kbase_props->props.cpu_id.part = 0; - kbase_props->props.cpu_id.arch = 0; - kbase_props->props.cpu_id.variant = 0; - kbase_props->props.cpu_id.implementer = 'N'; -} -#endif - -/* - * This function (and file!) is kept for the backward compatibility reasons. - * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE - * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call - * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having - * the function for reading cpu properties moved from base to osu. - */ - -int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, - struct kbase_uk_cpuprops * const props) -{ - unsigned int max_cpu_freq; - - props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2; - props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE; - props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN; - - props->props.nr_cores = num_possible_cpus(); - props->props.cpu_page_size_log2 = PAGE_SHIFT; - props->props.available_memory_size = totalram_pages << PAGE_SHIFT; - - kbasep_cpuprops_uk_get_cpu_id_info(props); - - /* check if kernel supports dynamic frequency scaling */ - max_cpu_freq = cpufreq_quick_get_max(KBASE_DEFAULT_CPU_NUM); - if (max_cpu_freq != 0) { - /* convert from kHz to mHz */ - props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000; - } else { - /* fallback if CONFIG_CPU_FREQ turned off */ - int err; - kbase_cpu_clk_speed_func get_clock_speed; - - get_clock_speed = (kbase_cpu_clk_speed_func) CPU_SPEED_FUNC; - err = get_clock_speed(&props->props.max_cpu_clock_speed_mhz); - if (err) - return err; - } - - return 0; -} - -#endif /* BASE_LEGACY_UK7_SUPPORT */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h deleted file mode 100644 index daa46e502e20..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -#include "mali_kbase.h" -#ifdef BASE_LEGACY_UK7_SUPPORT - -/** - * @file mali_kbase_cpuprops.h - * Base kernel property query APIs - */ - -#ifndef _KBASE_CPUPROPS_H_ -#define _KBASE_CPUPROPS_H_ - -/* Forward declarations */ -struct kbase_uk_cpuprops; - -/** - * This file is kept for the backward compatibility reasons. - * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE - * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call - * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having - * the function for reading cpu properties moved from base to osu. - */ - -/** - * @brief Provides CPU properties data. - * - * Fill the struct kbase_uk_cpuprops with values from CPU configuration. - * - * @param kctx The kbase context - * @param kbase_props A copy of the struct kbase_uk_cpuprops structure from userspace - * - * @return 0 on success. Any other value indicates failure. - */ -int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props); - -#endif /*_KBASE_CPUPROPS_H_*/ -#endif /* BASE_LEGACY_UK7_SUPPORT */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c new file mode 100644 index 000000000000..41ce05130d8f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -0,0 +1,447 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase_debug_job_fault.h" + +#ifdef CONFIG_DEBUG_FS + +static bool kbase_is_job_fault_event_pending(struct list_head *event_list) +{ + bool ret; + + ret = (!list_empty(event_list)); + + return ret; +} + +static bool kbase_ctx_has_no_event_pending( + struct kbase_context *kctx, struct list_head *event_list) +{ + struct base_job_fault_event *event; + + if (list_empty(event_list)) + return true; + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) + return false; + } + return false; +} + +/* wait until the fault happen and copy the event */ +static int kbase_job_fault_event_wait(struct kbase_device *kbdev, + struct list_head *event_list, + struct base_job_fault_event *event) +{ + struct base_job_fault_event *event_in; + + if (list_empty(event_list)) { + if (wait_event_interruptible(kbdev->job_fault_wq, + kbase_is_job_fault_event_pending(event_list))) + return -ERESTARTSYS; + } + + event_in = list_entry(event_list->next, + struct base_job_fault_event, head); + + event->event_code = event_in->event_code; + event->katom = event_in->katom; + return 0; + +} + +/* remove the event from the queue */ +static struct base_job_fault_event *kbase_job_fault_event_dequeue( + struct kbase_device *kbdev, struct list_head *event_list) +{ + struct base_job_fault_event *event; + + event = list_entry(event_list->next, + struct base_job_fault_event, head); + list_del(event_list->next); + + return event; + +} + +/* Remove all the following atoms after the failed atom in the same context + * Call the postponed bottom half of job done. + * Then, this context could be rescheduled. + */ +static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->job_fault_resume_event_list; + + while (!list_empty(event_list)) { + struct base_job_fault_event *event; + + event = kbase_job_fault_event_dequeue(kctx->kbdev, + &kctx->job_fault_resume_event_list); + kbase_jd_done_worker(&event->katom->work); + } + +} + +/* Remove all the failed atoms that belong to different contexts + * Resume all the contexts that were suspend due to failed job + */ +static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) +{ + struct list_head *event_list = &kbdev->job_fault_event_list; + + while (!list_empty(event_list)) { + + kbase_job_fault_event_dequeue(kbdev, event_list); + wake_up(&kbdev->job_fault_resume_wq); + } +} + +static void kbase_job_fault_resume_worker(struct work_struct *data) +{ + struct base_job_fault_event *event = container_of(data, + struct base_job_fault_event, job_fault_work); + struct kbase_context *kctx; + struct kbase_jd_atom *katom; + + katom = event->katom; + kctx = katom->kctx; + + dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + + /* When it was waked up, it need to check if queue is empty or the + * failed atom belongs to different context. If yes, wake up. Both + * of them mean the failed job has been dumped. Please note, it + * should never happen that the job_fault_event_list has the two + * atoms belong to the same context. + */ + wait_event(kctx->kbdev->job_fault_resume_wq, + kbase_ctx_has_no_event_pending(kctx, + &kctx->kbdev->job_fault_event_list)); + + atomic_set(&kctx->job_fault_count, 0); + kbase_jd_done_worker(&katom->work); + + /* In case the following atoms were scheduled during failed job dump + * the job_done_worker was held. We need to rerun it after the dump + * was finished + */ + kbase_job_fault_resume_event_cleanup(kctx); + + dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); +} + +static struct base_job_fault_event *kbase_job_fault_event_queue( + struct list_head *event_list, + struct kbase_jd_atom *atom, + u32 completion_code) +{ + struct base_job_fault_event *event; + + event = &atom->fault_event; + + event->katom = atom; + event->event_code = completion_code; + + list_add_tail(&event->head, event_list); + + return event; + +} + +static void kbase_job_fault_event_post(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, u32 completion_code) +{ + struct base_job_fault_event *event; + + event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, + katom, completion_code); + + wake_up_interruptible(&kbdev->job_fault_wq); + + INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); + queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); + + dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", + katom->kctx->tgid, katom->kctx->id); + +} + +/* + * This function will process the job fault + * Get the register copy + * Send the failed job dump event + * Create a Wait queue to wait until the job dump finish + */ + +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code) +{ + struct kbase_context *kctx = katom->kctx; + + /* Check if dumping is in the process + * only one atom of each context can be dumped at the same time + * If the atom belongs to different context, it can be dumped + */ + if (atomic_read(&kctx->job_fault_count) > 0) { + kbase_job_fault_event_queue( + &kctx->job_fault_resume_event_list, + katom, completion_code); + dev_info(kctx->kbdev->dev, "queue:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + } + + if (kctx->kbdev->job_fault_debug == true) { + + if (completion_code != BASE_JD_EVENT_DONE) { + + if (kbase_job_fault_get_reg_snapshot(kctx) == false) { + dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); + return false; + } + + kbase_job_fault_event_post(kctx->kbdev, katom, + completion_code); + atomic_inc(&kctx->job_fault_count); + dev_info(kctx->kbdev->dev, "post:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + + } + } + return false; + +} + +static int debug_job_fault_show(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + struct kbase_context *kctx = event->katom->kctx; + int i; + + dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", + kctx->tgid, kctx->id, event->reg_offset); + + if (kctx->reg_dump == NULL) { + dev_warn(kbdev->dev, "reg dump is NULL"); + return -1; + } + + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + /* Return the error here to stop the read. And the + * following next() will not be called. The stop can + * get the real event resource and release it + */ + return -1; + } + + if (event->reg_offset == 0) + seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); + + for (i = 0; i < 50; i++) { + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + break; + } + seq_printf(m, "%08x: %08x\n", + kctx->reg_dump[event->reg_offset], + kctx->reg_dump[1+event->reg_offset]); + event->reg_offset += 2; + + } + + + return 0; +} +static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + + dev_info(kbdev->dev, "debug job fault seq next:%d, %d", + event->reg_offset, (int)*pos); + + return event; +} + +static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event; + + dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); + + /* The condition is trick here. It needs make sure the + * fault hasn't happened and the dumping hasn't been started, + * or the dumping has finished + */ + if (*pos == 0) { + event = kmalloc(sizeof(*event), GFP_KERNEL); + event->reg_offset = 0; + if (kbase_job_fault_event_wait(kbdev, + &kbdev->job_fault_event_list, event)) { + kfree(event); + return NULL; + } + + /* The cache flush workaround is called in bottom half of + * job done but we delayed it. Now we should clean cache + * earlier. Then the GPU memory dump should be correct. + */ + if (event->katom->need_cache_flush_cores_retained) { + kbase_gpu_cacheclean(kbdev, event->katom); + event->katom->need_cache_flush_cores_retained = 0; + } + + } else + return NULL; + + return event; +} + +static void debug_job_fault_stop(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + + /* here we wake up the kbase_jd_done_worker after stop, it needs + * get the memory dump before the register dump in debug daemon, + * otherwise, the memory dump may be incorrect. + */ + + if (v != NULL) { + kfree(v); + dev_info(kbdev->dev, "debug job fault seq stop stage 1"); + + } else { + if (!list_empty(&kbdev->job_fault_event_list)) { + kbase_job_fault_event_dequeue(kbdev, + &kbdev->job_fault_event_list); + wake_up(&kbdev->job_fault_resume_wq); + } + dev_info(kbdev->dev, "debug job fault seq stop stage 2"); + } + +} + +static const struct seq_operations ops = { + .start = debug_job_fault_start, + .next = debug_job_fault_next, + .stop = debug_job_fault_stop, + .show = debug_job_fault_show, +}; + +static int debug_job_fault_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + seq_open(file, &ops); + + ((struct seq_file *)file->private_data)->private = kbdev; + dev_info(kbdev->dev, "debug job fault seq open"); + + kbdev->job_fault_debug = true; + + return 0; + +} + +static int debug_job_fault_release(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + seq_release(in, file); + + kbdev->job_fault_debug = false; + + /* Clean the unprocessed job fault. After that, all the suspended + * contexts could be rescheduled. + */ + kbase_job_fault_event_cleanup(kbdev); + + dev_info(kbdev->dev, "debug job fault seq close"); + + return 0; +} + +static const struct file_operations kbasep_debug_job_fault_fops = { + .open = debug_job_fault_open, + .read = seq_read, + .llseek = seq_lseek, + .release = debug_job_fault_release, +}; + +static int kbase_job_fault_event_init(struct kbase_device *kbdev) +{ + + INIT_LIST_HEAD(&kbdev->job_fault_event_list); + + init_waitqueue_head(&(kbdev->job_fault_wq)); + init_waitqueue_head(&(kbdev->job_fault_resume_wq)); + + kbdev->job_fault_resume_workq = alloc_workqueue( + "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + + return 0; +} + +/* + * Initialize debugfs entry for job fault dump + */ +void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + debugfs_create_file("job_fault", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); + + kbase_job_fault_event_init(kbdev); + kbdev->job_fault_debug = false; + +} + +/* + * Initialize the relevant data structure per context + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx) +{ + + /* We need allocate double size register range + * Because this memory will keep the register address and value + */ + kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL); + if (kctx->reg_dump == NULL) + return; + + if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { + kfree(kctx->reg_dump); + kctx->reg_dump = NULL; + } + INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); + atomic_set(&kctx->job_fault_count, 0); + +} + +/* + * release the relevant resource per context + */ +void kbase_debug_job_fault_context_exit(struct kbase_context *kctx) +{ + kfree(kctx->reg_dump); +} + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h new file mode 100644 index 000000000000..3734046f3fd9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -0,0 +1,82 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_DEBUG_JOB_FAULT_H +#define _KBASE_DEBUG_JOB_FAULT_H + +#include +#include +#include + +#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF + +/** + * kbase_debug_job_fault_dev_init - Initialize job fault debug sysfs + * and create the fault event wait queue per device + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_context_init - Initialize the relevant + * data structure per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_context_exit - Release the relevant + * resource per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_exit(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_process - Process the failed job. + * It will send a event and wake up the job fault waiting queue + * Then create a work queue to wait for job dump finish + * This function should be called in the interrupt handler and before + * jd_done that make sure the jd_done_worker will be delayed until the + * job dump finish + * @katom: The failed atom pointer + * @completion_code: the job status + * @return true if dump is going on + */ +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code); + + +/** + * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers + * address during the job fault process, the relevant registers will + * be saved when a job fault happen + * @kctx: KBase context pointer + * @reg_range: Maximum register address space + * @return true if initializing successfully + */ +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range); + +/** + * kbase_job_fault_get_reg_snapshot - Read the interested registers for + * failed job dump + * @kctx: KBase context pointer + * @return true if getting registers successfully + */ +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); + +#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index e7c9f34119b1..fe5f1046fd73 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,11 @@ #include #include +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER +#include +#endif + + #ifdef CONFIG_KDS #include #endif /* CONFIG_KDS */ @@ -59,6 +63,11 @@ #include #include +#if defined(CONFIG_PM_RUNTIME) || \ + (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) +#define KBASE_PM_RUNTIME 1 +#endif + /** Enable SW tracing when set */ #ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE #define KBASE_TRACE_ENABLE 1 @@ -98,6 +107,7 @@ * @note if not in use, define this value to 0 instead of \#undef'ing it */ #define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + /** * Prevent hard-stops from occuring in scheduling situations * @@ -196,6 +206,18 @@ #define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT +#ifdef CONFIG_DEBUG_FS +struct base_job_fault_event { + + u32 event_code; + struct kbase_jd_atom *katom; + struct work_struct job_fault_work; + struct list_head head; + int reg_offset; +}; + +#endif + struct kbase_jd_atom_dependency { struct kbase_jd_atom *atom; u8 dep_type; @@ -366,6 +388,9 @@ struct kbase_jd_atom { struct kbase_jd_atom_backend backend; +#ifdef CONFIG_DEBUG_FS + struct base_job_fault_event fault_event; +#endif }; static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) @@ -713,6 +738,32 @@ struct kbase_secure_ops { }; +/** + * struct kbase_mem_pool - Page based memory pool for kctx/kbdev + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed @max_size + * in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size + * and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @next_pool: Pointer to next pool where pages can be allocated when this pool + * is empty. Pages will spill over to the next pool when this pool + * is full. Can be NULL if there is no next pool. + */ +struct kbase_mem_pool { + struct kbase_device *kbdev; + size_t cur_size; + size_t max_size; + spinlock_t pool_lock; + struct list_head page_list; + struct shrinker reclaim; + + struct kbase_mem_pool *next_pool; +}; + + #define DEVNAME_SIZE 16 struct kbase_device { @@ -721,6 +772,7 @@ struct kbase_device { u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; + u32 hw_quirks_jm; struct list_head entry; struct device *dev; @@ -753,6 +805,7 @@ struct kbase_device { struct kbase_pm_device_data pm; struct kbasep_js_device_data js_data; + struct kbase_mem_pool mem_pool; struct kbasep_mem_device memdev; struct kbase_mmu_mode const *mmu_mode; @@ -767,11 +820,6 @@ struct kbase_device { /** List of features available */ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - /* Cached present bitmaps - these are the same as the corresponding hardware registers */ - u64 shader_present_bitmap; - u64 tiler_present_bitmap; - u64 l2_present_bitmap; - /* Bitmaps of cores that are currently in use (running jobs). * These should be kept up to date by the job scheduler. * @@ -899,6 +947,8 @@ struct kbase_device { #endif #endif + struct kbase_ipa_context *ipa_ctx; + #ifdef CONFIG_MALI_TRACE_TIMELINE struct kbase_trace_kbdev_timeline timeline; #endif @@ -908,6 +958,15 @@ struct kbase_device { struct dentry *mali_debugfs_directory; /* Root directory for per context entry */ struct dentry *debugfs_ctx_directory; + + /* failed job dump, used for separate debug process */ + bool job_fault_debug; + wait_queue_head_t job_fault_wq; + wait_queue_head_t job_fault_resume_wq; + struct workqueue_struct *job_fault_resume_workq; + struct list_head job_fault_event_list; + struct kbase_context *kctx_fault; + #endif /* CONFIG_DEBUG_FS */ /* fbdump profiling controls set by gator */ @@ -931,7 +990,6 @@ struct kbase_device { bool force_replay_random; #endif - /* Total number of created contexts */ atomic_t ctx_num; @@ -946,12 +1004,36 @@ struct kbase_device { /* defaults for new context created for this device */ u32 infinite_cache_active_default; + size_t mem_pool_max_size_default; /* system coherency mode */ u32 system_coherency; /* Secure operations */ struct kbase_secure_ops *secure_ops; + + /* + * true when GPU is put into secure mode + */ + bool secure_mode; + + /* + * true if secure mode is supported + */ + bool secure_mode_support; + + +#ifdef CONFIG_MALI_DEBUG + wait_queue_head_t driver_inactive_wait; + bool driver_inactive; +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + /* + * Bus logger integration. + */ + struct bus_logger_client *buslogger; +#endif }; /* JSCTX ringbuffer size must always be a power of 2 */ @@ -1014,7 +1096,7 @@ struct kbase_context { u64 *mmu_teardown_pages; - phys_addr_t aliasing_sink_page; + struct page *aliasing_sink_page; struct mutex reg_lock; /* To be converted to a rwlock? */ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ @@ -1030,8 +1112,7 @@ struct kbase_context { atomic_t used_pages; atomic_t nonmapped_pages; - struct kbase_mem_allocator osalloc; - struct kbase_mem_allocator *pgd_allocator; + struct kbase_mem_pool mem_pool; struct list_head waiting_soft_jobs; #ifdef CONFIG_KDS @@ -1070,6 +1151,15 @@ struct kbase_context { /* Spinlock guarding data */ spinlock_t mem_profile_lock; struct dentry *kctx_dentry; + + /* for job fault debug */ + unsigned int *reg_dump; + atomic_t job_fault_count; + /* This list will keep the following atoms during the dump + * in the same context + */ + struct list_head job_fault_resume_event_list; + #endif /* CONFIG_DEBUG_FS */ struct jsctx_rb jsctx_rb @@ -1099,6 +1189,14 @@ struct kbase_context { /* Only one userspace vinstr client per kbase context */ struct kbase_vinstr_client *vinstr_cli; struct mutex vinstr_cli_lock; + + /* Must hold queue_mutex when accessing */ + bool ctx_active; + + /* List of completed jobs waiting for events to be posted */ + struct list_head completed_jobs; + /* Number of work items currently pending on job_done_wq */ + atomic_t work_count; }; enum kbase_reg_access_type { diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index 01e41462f17c..6b8a2854a7a7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -153,14 +153,16 @@ int kbase_device_init(struct kbase_device * const kbdev) err = kbase_hw_set_issues_mask(kbdev); if (err) goto fail; + /* Set the list of features available on the current HW * (identified by the GPU_ID register) */ kbase_hw_set_features_mask(kbdev); -#if defined(CONFIG_ARM64) + /* On Linux 4.0+, dma coherency is determined from device tree */ +#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); -#endif /* CONFIG_ARM64 */ +#endif /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our * device structure was created by device-tree @@ -178,15 +180,11 @@ int kbase_device_init(struct kbase_device * const kbdev) if (err) goto dma_set_mask_failed; - err = kbase_mem_lowlevel_init(kbdev); - if (err) - goto mem_lowlevel_init_failed; - kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; err = kbase_device_all_as_init(kbdev); if (err) - goto term_lowlevel_mem; + goto as_init_failed; spin_lock_init(&kbdev->hwcnt.lock); @@ -222,14 +220,16 @@ int kbase_device_init(struct kbase_device * const kbdev) kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); +#ifdef CONFIG_MALI_DEBUG + init_waitqueue_head(&kbdev->driver_inactive_wait); +#endif /* CONFIG_MALI_DEBUG */ + return 0; term_trace: kbasep_trace_term(kbdev); term_as: kbase_device_all_as_term(kbdev); -term_lowlevel_mem: - kbase_mem_lowlevel_term(kbdev); -mem_lowlevel_init_failed: +as_init_failed: dma_set_mask_failed: fail: return err; @@ -248,8 +248,6 @@ void kbase_device_term(struct kbase_device *kbdev) kbasep_trace_term(kbdev); kbase_device_all_as_term(kbdev); - - kbase_mem_lowlevel_term(kbdev); } void kbase_device_free(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index 784acecae173..25b30f0b976e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -28,6 +28,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru { struct base_jd_udata data; + lockdep_assert_held(&kctx->jctx.lock); + KBASE_DEBUG_ASSERT(kctx != NULL); KBASE_DEBUG_ASSERT(katom != NULL); KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); @@ -41,9 +43,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru kbase_tlstream_tl_del_atom(katom); #endif - mutex_lock(&kctx->jctx.lock); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - mutex_unlock(&kctx->jctx.lock); wake_up(&katom->completed); @@ -98,32 +98,72 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); uevent->event_code = atom->event_code; uevent->atom_number = (atom - ctx->jctx.atoms); + + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(atom); + + mutex_lock(&ctx->jctx.lock); uevent->udata = kbase_event_process(ctx, atom); + mutex_unlock(&ctx->jctx.lock); return 0; } KBASE_EXPORT_TEST_API(kbase_event_dequeue); -static void kbase_event_post_worker(struct work_struct *data) +/** + * kbase_event_process_noreport_worker - Worker for processing atoms that do not + * return an event but do have external + * resources + * @data: Work structure + */ +static void kbase_event_process_noreport_worker(struct work_struct *data) { - struct kbase_jd_atom *atom = container_of(data, struct kbase_jd_atom, work); - struct kbase_context *ctx = atom->kctx; + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; - if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) - kbase_jd_free_external_resources(atom); + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(katom); + mutex_lock(&kctx->jctx.lock); + kbase_event_process(kctx, katom); + mutex_unlock(&kctx->jctx.lock); +} + +/** + * kbase_event_process_noreport - Process atoms that do not return an event + * @kctx: Context pointer + * @katom: Atom to be processed + * + * Atoms that do not have external resources will be processed immediately. + * Atoms that do have external resources will be processed on a workqueue, in + * order to avoid locking issues. + */ +static void kbase_event_process_noreport(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + INIT_WORK(&katom->work, kbase_event_process_noreport_worker); + queue_work(kctx->event_workq, &katom->work); + } else { + kbase_event_process(kctx, katom); + } +} + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +{ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { /* Don't report the event */ - kbase_event_process(ctx, atom); + kbase_event_process_noreport(ctx, atom); return; } } if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { /* Don't report the event */ - kbase_event_process(ctx, atom); + kbase_event_process_noreport(ctx, atom); return; } @@ -133,17 +173,6 @@ static void kbase_event_post_worker(struct work_struct *data) kbase_event_wakeup(ctx); } - -void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) -{ - KBASE_DEBUG_ASSERT(ctx); - KBASE_DEBUG_ASSERT(ctx->event_workq); - KBASE_DEBUG_ASSERT(atom); - - INIT_WORK(&atom->work, kbase_event_post_worker); - queue_work(ctx->event_workq, &atom->work); -} - KBASE_EXPORT_TEST_API(kbase_event_post); void kbase_event_close(struct kbase_context *kctx) diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 6ef4e39c5cc8..a2174b24ac3c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -16,6 +16,7 @@ #include "mali_kbase.h" +#include "mali_kbase_hw.h" #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" @@ -148,10 +149,8 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; - /* If we are using a Mali-T6xx or Mali-T72x device */ - if (in_out_info->gpu_id == GPU_ID_PI_T60X || - in_out_info->gpu_id == GPU_ID_PI_T62X || - in_out_info->gpu_id == GPU_ID_PI_T72X) { + /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ + if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { uint32_t cg, j; uint64_t core_mask; @@ -227,7 +226,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->size = dump_size; - flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; nr_pages = PFN_UP(dump_size); reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, &flags, &hand->hwcnt_gpu_va, &va_alignment); @@ -241,6 +240,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn goto free_buffer; in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; + memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ setup.dump_buffer = hand->hwcnt_gpu_va; diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c index 4276665fbc98..d632a0bbb1bc 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -228,9 +228,12 @@ static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, str gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); - gpu_props->l2_props.num_l2_slices = 1; - if (gpu_props->core_props.product_id == GPU_ID_PI_T76X) - gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + + /* Field with number of l2 slices is added to MEM_FEATURES register + * since t76x. Below code assumes that for older GPU reserved bits will + * be read as zero. */ + gpu_props->l2_props.num_l2_slices = + KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index e37e43c2fb03..463fead4b05d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index 1a5a21b3812d..fac65d4f2286 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -135,6 +135,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_tFRx_r0p2; break; case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): issues = base_hw_issues_tFRx_r1p0; break; case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): @@ -144,6 +145,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t86x_r0p2; break; case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): issues = base_hw_issues_t86x_r1p0; break; case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): @@ -153,6 +155,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t83x_r0p1; break; case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): issues = base_hw_issues_t83x_r1p0; break; case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): @@ -162,6 +165,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t82x_r0p1; break; case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): issues = base_hw_issues_t82x_r1p0; break; default: diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index a2fdf247881f..6bddaa81073b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -129,13 +129,30 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, * @kbdev: Device pointer * @katom: Pointer to the atom to complete * - * This function should only be called from jd_done_worker(). + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). * * Return: true if atom has completed, false if atom should be re-submitted */ void kbase_backend_complete_wq(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +/** + * kbase_backend_complete_wq_post_sched - Perform backend-specific actions + * required on completing an atom, after + * any scheduling has taken place. + * @kbdev: Device pointer + * @core_req: Core requirements of atom + * @affinity: Affinity of atom + * @coreref_state: Coreref state of atom + * + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). + */ +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req, u64 affinity, + enum kbase_atom_coreref_state coreref_state); + /** * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU * and remove any others from the ringbuffers. @@ -265,6 +282,34 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); * signalled to know when the reset has completed. */ void kbase_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_locked - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for kbdev->reset_waitq to be + * signalled to know when the reset has completed. + */ +void kbase_reset_gpu_locked(struct kbase_device *kbdev); #endif /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h new file mode 100644 index 000000000000..cf7bf1b35dc5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -0,0 +1,66 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_HWCNT_READER_H_ +#define _KBASE_HWCNT_READER_H_ + +/* The ids of ioctl commands. */ +#define KBASE_HWCNT_READER 0xBE +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) +#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) +#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) +#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) + +/** + * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata + * @timestamp: time when sample was collected + * @event_id: id of an event that triggered sample collection + * @buffer_idx: position in sampling area where sample buffer was stored + */ +struct kbase_hwcnt_reader_metadata { + u64 timestamp; + u32 event_id; + u32 buffer_idx; +}; + +/** + * enum base_hwcnt_reader_event - hwcnt dumping events + * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump + * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump + * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request + * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request + * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events + */ +enum base_hwcnt_reader_event { + BASE_HWCNT_READER_EVENT_MANUAL, + BASE_HWCNT_READER_EVENT_PERIODIC, + BASE_HWCNT_READER_EVENT_PREJOB, + BASE_HWCNT_READER_EVENT_POSTJOB, + + BASE_HWCNT_READER_EVENT_COUNT +}; + +#endif /* _KBASE_HWCNT_READER_H_ */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c index 9c2043ac9480..314ae0819d50 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_instr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -63,7 +63,7 @@ void kbase_instr_hwcnt_resume(struct kbase_device *kbdev) } int kbase_instr_hwcnt_enable(struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup) + struct kbase_uk_hwcnt_setup *setup) { struct kbase_device *kbdev; bool access_allowed; @@ -122,36 +122,16 @@ out: } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable); -int kbase_instr_hwcnt_setup(struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup) +int kbase_instr_hwcnt_dump(struct kbase_context *kctx) { - struct kbase_vinstr_context *vinstr_ctx = kctx->kbdev->vinstr_ctx; - u32 bitmap[4]; - - if (setup == NULL) - return -EINVAL; - - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; - if (setup->dump_buffer) { - if (kctx->vinstr_cli) - return -EBUSY; - kctx->vinstr_cli = kbase_vinstr_attach_client(vinstr_ctx, false, - setup->dump_buffer, bitmap); - if (!kctx->vinstr_cli) - return -ENOMEM; - } else { - kbase_vinstr_detach_client(vinstr_ctx, kctx->vinstr_cli); - kctx->vinstr_cli = NULL; - } + int err; - return 0; -} + err = kbase_instr_hwcnt_request_dump(kctx); + if (err) + return err; -int kbase_instr_hwcnt_dump(struct kbase_context *kctx) -{ - return kbase_vinstr_dump(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli); + err = kbase_instr_hwcnt_wait_for_dump(kctx); + return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h index 5613453a3c08..ac3355e53634 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_instr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.h @@ -26,16 +26,6 @@ #include -/** - * kbase_instr_hwcnt_setup() - Configure HW counters collection - * @kctx: Kbase context - * @setup: &struct kbase_uk_hwcnt_setup containing configuration - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_setup(struct kbase_context *kctx, - struct kbase_uk_hwcnt_setup *setup); - /** * kbase_instr_hwcnt_enable() - Enable HW counters collection * @kctx: Kbase context diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/mali_kbase_ipa.c new file mode 100644 index 000000000000..433103c0d331 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.c @@ -0,0 +1,264 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +#include +#include + +#include + +#define NR_IPA_GROUPS 8 + +/** + * struct ipa_group - represents a single IPA group + * @name: name of the IPA group + * @capacitance: capacitance constant for IPA group + */ +struct ipa_group { + const char *name; + u32 capacitance; +}; + +/** + * struct kbase_ipa_context - IPA context per device + * @kbdev: pointer to kbase device + * @groups: array of IPA groups for this context + * @ipa_lock: protects the entire IPA context + */ +struct kbase_ipa_context { + struct kbase_device *kbdev; + struct ipa_group groups[NR_IPA_GROUPS]; + struct mutex ipa_lock; +}; + +static struct ipa_group ipa_groups_def_v4[] = { + { .name = "group0", .capacitance = 0 }, + { .name = "group1", .capacitance = 0 }, + { .name = "group2", .capacitance = 0 }, + { .name = "group3", .capacitance = 0 }, + { .name = "group4", .capacitance = 0 }, + { .name = "group5", .capacitance = 0 }, + { .name = "group6", .capacitance = 0 }, + { .name = "group7", .capacitance = 0 }, +}; + +static struct ipa_group ipa_groups_def_v5[] = { + { .name = "group0", .capacitance = 0 }, + { .name = "group1", .capacitance = 0 }, + { .name = "group2", .capacitance = 0 }, + { .name = "group3", .capacitance = 0 }, + { .name = "group4", .capacitance = 0 }, + { .name = "group5", .capacitance = 0 }, + { .name = "group6", .capacitance = 0 }, + { .name = "group7", .capacitance = 0 }, +}; + +static ssize_t show_ipa_group(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_ipa_context *ctx = kbdev->ipa_ctx; + ssize_t count = -EINVAL; + size_t i; + + mutex_lock(&ctx->ipa_lock); + for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { + if (!strcmp(ctx->groups[i].name, attr->attr.name)) { + count = snprintf(buf, PAGE_SIZE, "%lu\n", + (unsigned long)ctx->groups[i].capacitance); + break; + } + } + mutex_unlock(&ctx->ipa_lock); + return count; +} + +static ssize_t set_ipa_group(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_ipa_context *ctx = kbdev->ipa_ctx; + unsigned long capacitance; + size_t i; + int err; + + err = kstrtoul(buf, 0, &capacitance); + if (err < 0) + return err; + if (capacitance > U32_MAX) + return -ERANGE; + + mutex_lock(&ctx->ipa_lock); + for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { + if (!strcmp(ctx->groups[i].name, attr->attr.name)) { + ctx->groups[i].capacitance = capacitance; + mutex_unlock(&ctx->ipa_lock); + return count; + } + } + mutex_unlock(&ctx->ipa_lock); + return -EINVAL; +} + +static DEVICE_ATTR(group0, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group1, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group2, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group3, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group4, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group5, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group6, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); +static DEVICE_ATTR(group7, S_IRUGO | S_IWUSR, show_ipa_group, set_ipa_group); + +static struct attribute *kbase_ipa_attrs[] = { + &dev_attr_group0.attr, + &dev_attr_group1.attr, + &dev_attr_group2.attr, + &dev_attr_group3.attr, + &dev_attr_group4.attr, + &dev_attr_group5.attr, + &dev_attr_group6.attr, + &dev_attr_group7.attr, + NULL, +}; + +static struct attribute_group kbase_ipa_attr_group = { + .name = "ipa", + .attrs = kbase_ipa_attrs, +}; + +static void init_ipa_groups(struct kbase_ipa_context *ctx) +{ + struct kbase_device *kbdev = ctx->kbdev; + struct ipa_group *defs; + size_t i, len; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { + defs = ipa_groups_def_v4; + len = ARRAY_SIZE(ipa_groups_def_v4); + } else { + defs = ipa_groups_def_v5; + len = ARRAY_SIZE(ipa_groups_def_v5); + } + + for (i = 0; i < len; i++) { + ctx->groups[i].name = defs[i].name; + ctx->groups[i].capacitance = defs[i].capacitance; + } +} + +#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) +static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) +{ + struct kbase_device *kbdev = ctx->kbdev; + struct device_node *np, *child; + struct ipa_group *group; + size_t nr_groups; + size_t i; + int err; + + np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups"); + if (!np) + return 0; + + nr_groups = 0; + for_each_available_child_of_node(np, child) + nr_groups++; + if (!nr_groups || nr_groups > ARRAY_SIZE(ctx->groups)) { + dev_err(kbdev->dev, "invalid number of IPA groups: %zu", nr_groups); + err = -EINVAL; + goto err0; + } + + for_each_available_child_of_node(np, child) { + const char *name; + u32 capacitance; + + name = of_get_property(child, "label", NULL); + if (!name) { + dev_err(kbdev->dev, "label missing for IPA group"); + err = -EINVAL; + goto err0; + } + err = of_property_read_u32(child, "capacitance", + &capacitance); + if (err < 0) { + dev_err(kbdev->dev, "capacitance missing for IPA group"); + goto err0; + } + + for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { + group = &ctx->groups[i]; + if (!strcmp(group->name, name)) { + group->capacitance = capacitance; + break; + } + } + } + + of_node_put(np); + return 0; +err0: + of_node_put(np); + return err; +} +#else +static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) +{ + return 0; +} +#endif + +static int reset_ipa_groups(struct kbase_ipa_context *ctx) +{ + init_ipa_groups(ctx); + return update_ipa_groups_from_dt(ctx); +} + +struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev) +{ + struct kbase_ipa_context *ctx; + int err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + mutex_init(&ctx->ipa_lock); + ctx->kbdev = kbdev; + + err = reset_ipa_groups(ctx); + if (err < 0) + goto err0; + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_ipa_attr_group); + if (err < 0) + goto err0; + + return ctx; +err0: + kfree(ctx); + return NULL; +} + +void kbase_ipa_term(struct kbase_ipa_context *ctx) +{ + struct kbase_device *kbdev = ctx->kbdev; + + sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group); + kfree(ctx); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/mali_kbase_ipa.h similarity index 54% rename from drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c rename to drivers/gpu/arm/midgard/mali_kbase_ipa.h index 4ee35bbc4274..ed123759ff3c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.h @@ -15,25 +15,18 @@ - +struct kbase_ipa_context; /** - * @file mali_kbase_pm_metrics_dummy.c - * Dummy Metrics for power management. + * kbase_ipa_init - initialize the kbase ipa core + * @kbdev: kbase device + * + * Return: pointer to the IPA context or NULL on failure */ +struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev); -#include -#include - -void kbase_pm_register_vsync_callback(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - /* no VSync metrics will be available */ - kbdev->pm.backend.metrics.platform_data = NULL; -} - -void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); -} +/** + * kbase_ipa_term - terminate the kbase ipa core + * @ctx: pointer to the IPA context + */ +void kbase_ipa_term(struct kbase_ipa_context *ctx); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index c5a862340490..dd2d187d5cd9 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -172,7 +172,7 @@ static void kds_dep_clear(void *callback_parameter, void *callback_extra_paramet if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { /* The atom has already finished */ - resched |= jd_done_nolock(katom); + resched |= jd_done_nolock(katom, NULL); } if (resched) @@ -192,7 +192,7 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { /* Wait was cancelled - zap the atom */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom)) + if (jd_done_nolock(katom, NULL)) kbase_js_sched_all(katom->kctx->kbdev); } } @@ -316,7 +316,9 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_mem_phy_alloc *alloc; + + alloc = katom->extres[res_no].alloc; #ifdef CONFIG_DMA_SHARED_BUFFER if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { alloc->imported.umm.current_mapping_usage_count--; @@ -338,7 +340,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) } } #endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + kbase_mem_phy_alloc_put(alloc); } kfree(katom->extres); katom->extres = NULL; @@ -454,7 +456,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* decide what needs to happen for this resource */ switch (reg->gpu_alloc->type) { - case BASE_TMEM_IMPORT_TYPE_UMP: + case BASE_MEM_IMPORT_TYPE_UMP: { #if defined(CONFIG_KDS) && defined(CONFIG_UMP) struct kds_resource *kds_res; @@ -468,7 +470,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st break; } #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_TMEM_IMPORT_TYPE_UMM: + case BASE_MEM_IMPORT_TYPE_UMM: { #ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS struct kds_resource *kds_res; @@ -695,7 +697,8 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) * * The caller must hold the kbase_jd_context.lock. */ -bool jd_done_nolock(struct kbase_jd_atom *katom) +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx) { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -795,7 +798,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom) * is in a disjoint state (ie. being reset or replaying jobs). */ kbase_disjoint_event_potential(kctx->kbdev); - kbase_event_post(kctx, katom); + if (completed_jobs_ctx) + list_add_tail(&katom->dep_item[0], completed_jobs_ctx); + else + kbase_event_post(kctx, katom); /* Decrement and check the TOTAL number of jobs. This includes * those not tracked by the scheduler: 'not ready to run' and @@ -926,7 +932,7 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_tlstream_tl_ret_atom_ctx( katom, kctx); #endif - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } } @@ -980,7 +986,7 @@ bool jd_submit_atom(struct kbase_context *kctx, goto out; } } - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } else { @@ -1007,7 +1013,7 @@ bool jd_submit_atom(struct kbase_context *kctx, if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } @@ -1018,7 +1024,7 @@ bool jd_submit_atom(struct kbase_context *kctx, "Rejecting atom with invalid device_nr %d", katom->device_nr); katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } @@ -1033,7 +1039,7 @@ bool jd_submit_atom(struct kbase_context *kctx, if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { /* setup failed (no access, bad resource, unknown resource types, etc.) */ katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } } @@ -1048,14 +1054,14 @@ bool jd_submit_atom(struct kbase_context *kctx, if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } } else { /* Soft-job */ if (kbase_prepare_soft_job(katom) != 0) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } } @@ -1082,13 +1088,13 @@ bool jd_submit_atom(struct kbase_context *kctx, if (kbase_replay_process(katom)) ret = false; else - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); goto out; } /* The job has not yet completed */ @@ -1099,10 +1105,10 @@ bool jd_submit_atom(struct kbase_context *kctx, ret = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); } else { /* This is a pure dependency. Resolve it immediately */ - ret = jd_done_nolock(katom); + ret = jd_done_nolock(katom, NULL); } out: @@ -1244,7 +1250,8 @@ while (false) kbase_js_sched_all(kbdev); if (wait_event_killable(katom->completed, - katom->status == KBASE_JD_ATOM_STATE_UNUSED)) { + katom->status == + KBASE_JD_ATOM_STATE_UNUSED) != 0) { /* We're being killed so the result code * doesn't really matter */ @@ -1272,24 +1279,7 @@ while (false) KBASE_EXPORT_TEST_API(kbase_jd_submit); -/** - * jd_done_worker - Handle a job completion - * @data: a &struct work_struct - * - * This function requeues the job from the runpool (if it was soft-stopped or - * removed from NEXT registers). - * - * Removes it from the system if it finished/failed/was cancelled. - * - * Resolves dependencies to add dependent jobs to the context, potentially - * starting them if necessary (which may add more references to the context) - * - * Releases the reference to the context from the no-longer-running job. - * - * Handles retrying submission outside of IRQ context if it failed from within - * IRQ context. - */ -static void jd_done_worker(struct work_struct *data) +void kbase_jd_done_worker(struct work_struct *data) { struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); struct kbase_jd_context *jctx; @@ -1301,6 +1291,10 @@ static void jd_done_worker(struct work_struct *data) u64 cache_jc = katom->jc; struct kbasep_js_atom_retained_state katom_retained_state; bool schedule = false; + bool context_idle; + base_jd_core_req core_req = katom->core_req; + u64 affinity = katom->affinity; + enum kbase_atom_coreref_state coreref_state = katom->coreref_state; /* Soft jobs should never reach this function */ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); @@ -1372,7 +1366,7 @@ static void jd_done_worker(struct work_struct *data) if (kbasep_js_has_atom_finished(&katom_retained_state)) schedule = true; - kbase_js_complete_atom_wq(kctx, katom); + context_idle = kbase_js_complete_atom_wq(kctx, katom); KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); @@ -1381,10 +1375,57 @@ static void jd_done_worker(struct work_struct *data) mutex_unlock(&js_devdata->queue_mutex); katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ - schedule |= jd_done_nolock(katom); + schedule |= jd_done_nolock(katom, &kctx->completed_jobs); /* katom may have been freed now, do not use! */ + if (context_idle) { + unsigned long flags; + + mutex_lock(&js_devdata->queue_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* If kbase_sched() has scheduled this context back in then + * ctx_active will have been set after we marked it as inactive, + * and another pm reference will have been taken, so drop our + * reference. But do not call kbase_jm_idle_ctx(), as the + * context is active and fast-starting is allowed. + * + * If an atom has been fast-started then kctx->atoms_pulled will + * be non-zero but ctx_active will still be false (as the + * previous pm reference has been inherited). Do NOT drop our + * reference, as it has been re-used, and leave the context as + * active. + * + * If no new atoms have been started then ctx_active will still + * be false and atoms_pulled will be zero, so drop the reference + * and call kbase_jm_idle_ctx(). + * + * As the checks are done under both the queue_mutex and + * runpool_irq.lock is should be impossible for this to race + * with the scheduler code. + */ + if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) { + /* Calling kbase_jm_idle_ctx() here will ensure that + * atoms are not fast-started when we drop the + * runpool_irq.lock. This is not performed if ctx_active + * is set as in that case another pm reference has been + * taken and a fast-start would be valid. + */ + if (!kctx->ctx_active) + kbase_jm_idle_ctx(kbdev, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + + kbase_pm_context_idle(kbdev); + } else { + kctx->ctx_active = true; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + } + mutex_unlock(&js_devdata->queue_mutex); + } + /* * Transaction complete */ @@ -1398,6 +1439,24 @@ static void jd_done_worker(struct work_struct *data) if (schedule) kbase_js_sched_all(kbdev); + if (!atomic_dec_return(&kctx->work_count)) { + /* If worker now idle then post all events that jd_done_nolock() + * has queued */ + mutex_lock(&jctx->lock); + while (!list_empty(&kctx->completed_jobs)) { + struct kbase_jd_atom *atom = list_entry( + kctx->completed_jobs.next, + struct kbase_jd_atom, dep_item[0]); + list_del(kctx->completed_jobs.next); + + kbase_event_post(kctx, atom); + } + mutex_unlock(&jctx->lock); + } + + kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, + coreref_state); + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); } @@ -1406,12 +1465,12 @@ static void jd_done_worker(struct work_struct *data) * @data: a &struct work_struct * * Only called as part of 'Zapping' a context (which occurs on termination). - * Operates serially with the jd_done_worker() on the work queue. + * Operates serially with the kbase_jd_done_worker() on the work queue. * * This can only be called on contexts that aren't scheduled. * * We don't need to release most of the resources that would occur on - * kbase_jd_done() or jd_done_worker(), because the atoms here must not be + * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be * running (by virtue of only being called on contexts that aren't * scheduled). */ @@ -1449,7 +1508,7 @@ static void jd_cancel_worker(struct work_struct *data) mutex_lock(&jctx->lock); - need_to_try_schedule_context = jd_done_nolock(katom); + need_to_try_schedule_context = jd_done_nolock(katom, NULL); /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to * schedule the context. There's also no need for the jsctx_mutex to have been taken * around this too. */ @@ -1468,11 +1527,11 @@ static void jd_cancel_worker(struct work_struct *data) * * Only called as part of evicting failed jobs. This is only called on jobs that * were never submitted to HW Access. Jobs that were submitted are handled - * through jd_done_worker(). - * Operates serially with the jd_done_worker() on the work queue. + * through kbase_jd_done_worker(). + * Operates serially with the kbase_jd_done_worker() on the work queue. * * We don't need to release most of the resources that would occur on - * kbase_jd_done() or jd_done_worker(), because the atoms here must not be + * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be * running (by virtue of having not been submitted to HW Access). */ static void jd_evict_worker(struct work_struct *data) @@ -1500,7 +1559,7 @@ static void jd_evict_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&jctx->lock); - jd_done_nolock(katom); + jd_done_nolock(katom, NULL); /* katom may have been freed now, do not use! */ mutex_unlock(&jctx->lock); @@ -1546,10 +1605,17 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, katom->slot_nr = slot_nr; - WARN_ON(work_pending(&katom->work)); + atomic_inc(&kctx->work_count); + +#ifdef CONFIG_DEBUG_FS + /* a failed job happened and is waiting for dumping*/ + if (kbase_debug_job_fault_process(katom, katom->event_code)) + return; +#endif + WARN_ON(work_pending(&katom->work)); KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, jd_done_worker); + INIT_WORK(&katom->work, kbase_jd_done_worker); queue_work(kctx->jctx.job_done_wq, &katom->work); } @@ -1619,7 +1685,6 @@ void kbase_jd_zap_context(struct kbase_context *kctx) KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); kbase_js_zap_context(kctx); - kbase_jm_wait_for_zero_jobs(kctx); mutex_lock(&kctx->jctx.lock); @@ -1653,6 +1718,8 @@ void kbase_jd_zap_context(struct kbase_context *kctx) #endif mutex_unlock(&kctx->jctx.lock); + + kbase_jm_wait_for_zero_jobs(kctx); } KBASE_EXPORT_TEST_API(kbase_jd_zap_context); @@ -1699,6 +1766,8 @@ int kbase_jd_init(struct kbase_context *kctx) #endif /* CONFIG_KDS */ kctx->jctx.job_nr = 0; + INIT_LIST_HEAD(&kctx->completed_jobs); + atomic_set(&kctx->work_count, 0); return 0; diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c index bc5ad805818c..63425322452b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -103,8 +103,6 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ON(atomic_read(&kctx->atoms_pulled)); - if (kbdev->hwaccess.active_kctx == kctx) kbdev->hwaccess.active_kctx = NULL; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index c49d622b7611..54b8d9bcd1e1 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -245,6 +245,26 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, * @js: Job slot id to check. * @prio: Priority to check. * + * Caller must hold runpool_irq.lock + * + * Return: true if the ring buffer is empty, false otherwise. + */ +static inline bool +jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + return rb->running_idx == rb->write_idx; +} + +/** + * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority to check. + * * Return true if there are no atoms to pull. There may be running atoms in the * ring buffer even if there are no atoms to pull. It is also possible for the * ring buffer to be full (with running atoms) when this functions returns @@ -255,7 +275,7 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, * Return: true if there are no atoms to pull, false otherwise. */ static inline bool -jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) +jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; @@ -265,24 +285,25 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) } /** - * jsctx_rb_is_empty(): - Check if all priority ring buffers are empty + * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no + * pullable atoms * @kctx: Pointer to kbase context with ring buffer. * @js: Job slot id to check. * * Caller must hold runpool_irq.lock * - * Return: true if the ring buffers for all priorities are empty, false - * otherwise. + * Return: true if the ring buffers for all priorities have no pullable atoms, + * false otherwise. */ static inline bool -jsctx_rb_is_empty(struct kbase_context *kctx, int js) +jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) { int prio; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { - if (!jsctx_rb_is_empty_prio(kctx, js, prio)) + if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; } @@ -420,7 +441,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (jsctx_rb_is_empty_prio(kctx, js, prio)) + if (jsctx_rb_none_to_pull_prio(kctx, js, prio)) return NULL; id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; @@ -808,12 +829,6 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]); } - jsdd->runpool_irq.secure_mode = false; - if (kbdev->secure_ops) { - /* Make sure secure mode is disabled */ - kbdev->secure_ops->secure_mode_disable(kbdev); - } - /* On error, do no cleanup; this will be handled by the caller(s), since * we've designed this resource to be safe to terminate on init-fail */ if (jsdd->init_status != JS_DEVDATA_INIT_ALL) @@ -871,7 +886,6 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE); js_kctx_info->ctx.nr_jobs = 0; - atomic_set(&js_kctx_info->ctx.fault_count, 0); js_kctx_info->ctx.is_scheduled = false; js_kctx_info->ctx.is_dying = false; memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, @@ -1158,7 +1172,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, } katom = jsctx_rb_peek(kctx, js); if (!katom) - return false; /* ringbuffer empty */ + return false; /* No pullable atoms */ if (atomic_read(&katom->blocked)) return false; /* next atom blocked */ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { @@ -1380,10 +1394,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Context Attribute Refcounting */ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); - if (enqueue_required) - timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, + if (enqueue_required) { + if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) + timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, atom->slot_nr); - + else + timer_sync = kbase_js_ctx_list_add_unpullable(kbdev, + kctx, atom->slot_nr); + } /* If this context is active and the atom is the first on its slot, * kick the job manager to attempt to fast-start the atom */ if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) @@ -1586,7 +1604,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( if (js_devdata->nr_user_contexts_running != 0) { bool retry_submit = false; - int retry_jobslot; + int retry_jobslot = 0; if (katom_retained_state) retry_submit = kbasep_js_get_atom_retry_submit_slot( @@ -1711,6 +1729,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_trace_mali_mmu_as_released(kctx->as_nr); #endif #if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx); #endif @@ -1836,40 +1855,6 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); -#if 2 == MALI_INSTRUMENTATION_LEVEL - /* When any fault is detected, stop the context from submitting more - * and add a refcount to prevent the context from being removed while - * the job core dump is undergoing in the user space. Once the dump - * finish, it will release the refcount of the context and allow it - * to be removed. The test conditions are to ensure this mechanism - * will be triggered only in the cases that cmarp_event_handler - * handles. - * - * Currently, cmar event handler only handles job exceptions and - * assert the cases where the event code of the atom does not - * belong to the MMU exceptions or GPU exceptions class. In order to - * perform dump on error in those cases, changes in cmar event handler - * need to be made. - */ - if ((BASE_JD_EVENT_NOT_STARTED != event_code) && - (BASE_JD_EVENT_STOPPED != event_code) && - (BASE_JD_EVENT_ACTIVE != event_code) && - (!((event_code >= BASE_JD_EVENT_RANGE_KERNEL_ONLY_START) && - (event_code <= BASE_JD_EVENT_RANGE_KERNEL_ONLY_END))) && - ((event_code & BASE_JD_SW_EVENT) || - event_code <= BASE_JD_EVENT_UNKNOWN) && - (BASE_JD_EVENT_DONE != event_code)) { - unsigned long flags; - - atomic_inc(&js_kctx_info->ctx.fault_count); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbasep_js_clear_submit_allowed(js_devdata, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - - kbasep_js_runpool_retain_ctx(kbdev, kctx); - } -#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */ - release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state); @@ -1888,23 +1873,6 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( kbase_js_sched_all(kbdev); } -void kbasep_js_dump_fault_term(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - unsigned long flags; - struct kbasep_js_device_data *js_devdata; - - js_devdata = &kbdev->js_data; - - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbasep_js_set_submit_allowed(js_devdata, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - - kbasep_js_runpool_release_ctx(kbdev, kctx); - atomic_dec(&kctx->jctx.sched_info.ctx.fault_count); -} - - void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { @@ -2110,6 +2078,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #endif #if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx); + kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); #endif /* Cause any future waiter-on-termination to wait until the context is @@ -2233,6 +2202,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, mutex_unlock(&js_devdata->queue_mutex); } } +KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) @@ -2256,6 +2226,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, kbase_js_sched_all(kbdev); } +KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); void kbasep_js_suspend(struct kbase_device *kbdev) { @@ -2404,13 +2375,13 @@ int kbase_js_dep_resolved_submit(struct kbase_context *kctx, /* If slot will transition from unpullable to pullable then add to * pullable list */ - if (jsctx_rb_is_empty(kctx, katom->slot_nr)) { + if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { *enqueue_required = true; } else { *enqueue_required = false; - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); } + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); /* Add atom to ring buffer. */ if (unlikely(jsctx_rb_add_atom(kctx, katom))) { @@ -2500,6 +2471,9 @@ static void js_return_worker(struct work_struct *data) bool timer_sync = false; bool context_idle = false; unsigned long flags; + base_jd_core_req core_req = katom->core_req; + u64 affinity = katom->affinity; + enum kbase_atom_coreref_state coreref_state = katom->coreref_state; kbase_backend_complete_wq(kbdev, katom); @@ -2519,7 +2493,7 @@ static void js_return_worker(struct work_struct *data) spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (!atomic_read(&kctx->atoms_pulled_slot[js]) && - jsctx_rb_is_empty(kctx, js)) + jsctx_rb_none_to_pull(kctx, js)) timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); if (!atomic_read(&kctx->atoms_pulled)) { @@ -2549,8 +2523,11 @@ static void js_return_worker(struct work_struct *data) spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - if (context_idle) + if (context_idle) { + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); + } if (timer_sync) kbase_js_sync_timers(kbdev); @@ -2563,6 +2540,9 @@ static void js_return_worker(struct work_struct *data) &retained_state); kbase_js_sched_all(kbdev); + + kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, + coreref_state); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) @@ -2597,8 +2577,6 @@ static bool kbase_js_evict_atom(struct kbase_context *kctx, return false; if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - WARN_ON(katom_evict->gpu_rb_state != - KBASE_ATOM_GPU_RB_RETURN_TO_JS); WARN_ON(katom_evict->event_code != head_katom->event_code); return false; @@ -2691,7 +2669,7 @@ static void kbase_js_compact(struct kbase_context *kctx) jsctx_rb_compact(kctx, js); } -void kbase_js_complete_atom_wq(struct kbase_context *kctx, +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { struct kbasep_js_kctx_info *js_kctx_info; @@ -2730,7 +2708,7 @@ void kbase_js_complete_atom_wq(struct kbase_context *kctx, } if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && - jsctx_rb_is_empty(kctx, atom_slot)) + jsctx_rb_none_to_pull(kctx, atom_slot)) timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx, atom_slot); @@ -2762,16 +2740,18 @@ void kbase_js_complete_atom_wq(struct kbase_context *kctx, } } + /* Mark context as inactive. The pm reference will be dropped later in + * jd_done_worker(). + */ if (context_idle) - kbase_jm_idle_ctx(kbdev, kctx); + kctx->ctx_active = false; spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); - if (context_idle) - kbase_pm_context_idle(kbdev); + return context_idle; } void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) @@ -2800,6 +2780,7 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) katom, &kbdev->gpu_props.props.raw_props.js_features[ katom->slot_nr]); + kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); #endif /* Calculate the job's time used */ if (end_timestamp != NULL) { @@ -2860,7 +2841,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* No contexts on pullable list */ } - if (!atomic_read(&kctx->atoms_pulled)) { + if (!kctx->ctx_active) { context_idle = true; if (kbase_pm_context_active_handle_suspend( @@ -2879,6 +2860,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) up(&js_devdata->schedule_sem); return; } + kctx->ctx_active = true; } if (!kbase_js_use_ctx(kbdev, kctx)) { @@ -2902,8 +2884,11 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) &js_devdata->runpool_irq.lock, flags); mutex_unlock( &kctx->jctx.sched_info.ctx.jsctx_mutex); - if (context_idle) + if (context_idle) { + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); + } /* No more jobs can be submitted on this slot */ js_mask &= ~(1 << js); @@ -2936,6 +2921,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) spin_unlock_irqrestore( &js_devdata->runpool_irq.lock, flags); + WARN_ON(!kctx->ctx_active); + kctx->ctx_active = false; kbase_pm_context_idle(kbdev); } else { spin_unlock_irqrestore( diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 79e7705932f2..868c6808d628 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -392,21 +392,6 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_cont */ void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); - -/** - * @brief Release the refcount of the context and allow further submission - * of the context after the dump on error in user space terminates. - * - * Before this function is called, when a fault happens the kernel should - * have disallowed the context from further submission of jobs and - * retained the context to avoid it from being removed. This function - * releases the refcount of the context and allow further submission of - * jobs. - * - * This function should only be called when "instr=2" during compile time. - */ -void kbasep_js_dump_fault_term(struct kbase_device *kbdev, struct kbase_context *kctx); - /** * @brief Variant of kbase_js_runpool_release_ctx() that assumes that * kbasep_js_device_data::runpool_mutex and @@ -556,8 +541,10 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); * * @param[in] kctx Context pointer * @param[in] katom Pointer to the atom to complete + * @return true if the context is now idle (no jobs pulled) + * false otherwise */ -void kbase_js_complete_atom_wq(struct kbase_context *kctx, +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom); /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 492660079358..8891bff70c60 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -50,7 +50,7 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; - BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); @@ -96,7 +96,7 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; - BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); @@ -137,7 +137,7 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc js_kctx_info = &kctx->jctx.sched_info; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); @@ -173,7 +173,7 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); js_kctx_info = &kctx->jctx.sched_info; - BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index 7532f9c9e712..d65b494a70c9 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -288,11 +288,6 @@ struct kbasep_js_device_data { * submitted to a slot, and is de-refcounted immediately after a job * finishes */ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; - - /* - * true when GPU is put into secure mode - */ - bool secure_mode; } runpool_irq; /** @@ -368,6 +363,7 @@ struct kbasep_js_device_data { /* Support soft-stop on a single context */ bool softstop_always; #endif /* CONFIG_MALI_DEBUG */ + /** The initalized-flag is placed at the end, to avoid cache-pollution (we should * only be using this during init/term paths). * @note This is a write-once member, and so no locking is required to read */ @@ -446,8 +442,6 @@ struct kbasep_js_kctx_info { * list per job slot */ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; - - atomic_t fault_count; /**< The no. of times the context is retained due to the fault job. */ } ctx; /* The initalized-flag is placed at the end, to avoid cache-pollution (we should diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 2c7fb5a40325..2909f20c08b2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -469,7 +469,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, - (1ULL << (same_va_bits - PAGE_SHIFT)) - 2, + (1ULL << (same_va_bits - PAGE_SHIFT)) - 1, KBASE_REG_ZONE_SAME_VA); if (!same_va_reg) @@ -525,12 +525,13 @@ int kbase_mem_init(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev); memdev = &kbdev->memdev; + kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX; /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); - /* nothing to do, zero-inited when struct kbase_device was created */ - return 0; + return kbase_mem_pool_init(&kbdev->mem_pool, + KBASE_MEM_POOL_MAX_SIZE_KBDEV, kbdev, NULL); } void kbase_mem_halt(struct kbase_device *kbdev) @@ -550,6 +551,8 @@ void kbase_mem_term(struct kbase_device *kbdev) pages = atomic_read(&memdev->used_pages); if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + + kbase_mem_pool_term(&kbdev->mem_pool); } KBASE_EXPORT_TEST_API(kbase_mem_term); @@ -590,9 +593,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 new_reg->flags |= KBASE_REG_GROWABLE; - /* Set up default MEMATTR usage */ - new_reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); - new_reg->start_pfn = start_pfn; new_reg->nr_pages = nr_pages; @@ -634,6 +634,7 @@ void kbase_mmu_update(struct kbase_context *kctx) * * as_nr won't change because the caller has the runpool_irq lock */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); kctx->kbdev->mmu_mode->update(kctx); } @@ -663,14 +664,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 { int err; size_t i = 0; + unsigned long attr; unsigned long mask = ~KBASE_REG_MEMATTR_MASK; -#if defined(CONFIG_MALI_CACHE_COHERENT) - unsigned long attr = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); -#else - unsigned long attr = - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); -#endif + + if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && + (reg->flags & KBASE_REG_SHARE_BOTH)) + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); + else + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); @@ -699,7 +701,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 } else { err = kbase_mmu_insert_single_page(kctx, reg->start_pfn + i * stride, - kctx->aliasing_sink_page, + page_to_phys(kctx->aliasing_sink_page), alloc->imported.alias.aliased[i].length, (reg->flags & mask) | attr); @@ -986,7 +988,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); - BUG_ON(!mutex_is_locked(&kctx->reg_lock)); + lockdep_assert_held(&kctx->reg_lock); err = kbase_gpu_munmap(kctx, reg); if (err) { dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); @@ -1072,7 +1074,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) KBASE_EXPORT_TEST_API(kbase_mem_free); -void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags) +void kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags) { KBASE_DEBUG_ASSERT(NULL != reg); KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); @@ -1104,6 +1107,16 @@ void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags) reg->flags |= KBASE_REG_SHARE_BOTH; else if (flags & BASE_MEM_COHERENT_LOCAL) reg->flags |= KBASE_REG_SHARE_IN; + + /* Set up default MEMATTR usage */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE && + (reg->flags & KBASE_REG_SHARE_BOTH)) { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + } } KBASE_EXPORT_TEST_API(kbase_update_region_flags); @@ -1125,7 +1138,7 @@ int kbase_alloc_phy_pages_helper( * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); - if (kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc, + if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, nr_pages_requested, alloc->pages + alloc->nents) != 0) goto no_alloc; @@ -1165,7 +1178,7 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - kbase_mem_allocator_free(&alloc->imported.kctx->osalloc, + kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool, nr_pages_to_free, start_free, syncback); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index d8a03526a64c..1839cced237e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -242,8 +242,13 @@ struct kbase_va_region { * So we put the default limit to the maximum possible on Linux and shrink * it down, if required by the GPU, during initialization. */ -#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) /* Dedicated 16MB region for shader code */ -#define KBASE_REG_ZONE_EXEC_BASE ((1ULL << 32) >> PAGE_SHIFT) + +/* + * Dedicated 16MB region for shader code: + * VA range 0x101000000-0x102000000 + */ +#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) +#define KBASE_REG_ZONE_EXEC_BASE (0x101000000ULL >> PAGE_SHIFT) #define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) #define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) @@ -379,80 +384,156 @@ static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) return new_val; } +/* + * Max size for kbdev memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) + +/* + * Max size for kctx memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) + +/** + * kbase_mem_pool_init - Create a memory pool for a kbase device + * @pool: Memory pool to initialize + * @max_size: Maximum number of free pages the pool can hold + * @kbdev: Kbase device where memory is used + * @next_pool: Pointer to the next pool or NULL. + * + * Allocations from @pool are in whole pages. Each @pool has a free list where + * pages can be quickly allocated from. The free list is initially empty and + * filled whenever pages are freed back to the pool. The number of free pages + * in the pool will in general not exceed @max_size, but the pool may in + * certain corner cases grow above @max_size. + * + * If @next_pool is not NULL, we will allocate from @next_pool before going to + * the kernel allocator. Similarily pages can spill over to @next_pool when + * @pool is full. Pages are zeroed before they spill over to another pool, to + * prevent leaking information between applications. + * + * A shrinker is registered so that Linux mm can reclaim pages from the pool as + * needed. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + size_t max_size, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool); + +/** + * kbase_mem_pool_term - Destroy a memory pool + * @pool: Memory pool to destroy + * + * Pages in the pool will spill over to @next_pool (if available) or freed to + * the kernel. + */ +void kbase_mem_pool_term(struct kbase_mem_pool *pool); + /** - * @brief Initialize low-level memory access for a kbase device + * kbase_mem_pool_alloc - Allocate a page from memory pool + * @pool: Memory pool to allocate from * - * Performs any low-level setup needed for a kbase device to access memory on - * the device. + * Allocations from the pool are made as follows: + * 1. If there are free pages in the pool, allocate a page from @pool. + * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page + * from @next_pool. + * 3. Finally, allocate a page from the kernel. * - * @param kbdev kbase device to initialize memory access for - * @return 0 on success, Linux error code on failure + * Return: Pointer to allocated page, or NULL if allocation failed. */ -int kbase_mem_lowlevel_init(struct kbase_device *kbdev); +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); +/** + * kbase_mem_pool_free - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @page: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * Pages are freed to the pool as follows: + * 1. If @pool is not full, add @page to @pool. + * 2. Otherwise, if @next_pool is not NULL and not full, add @page to + * @next_pool. + * 3. Finally, free @page to the kernel. + */ +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, + bool dirty); /** - * @brief Terminate low-level memory access for a kbase device + * kbase_mem_pool_alloc_pages - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. * - * Perform any low-level cleanup needed to clean - * after @ref kbase_mem_lowlevel_init + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. * - * @param kbdev kbase device to clean up for + * Return: 0 on success, negative -errno on error */ -void kbase_mem_lowlevel_term(struct kbase_device *kbdev); +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, + phys_addr_t *pages); /** - * @brief Initialize an OS based memory allocator. + * kbase_mem_pool_free_pages - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + phys_addr_t *pages, bool dirty); + +/** + * kbase_mem_pool_size - Get number of free pages in memory pool + * @pool: Memory pool to inspect * - * Initializes a allocator. - * Must be called before any allocation is attempted. - * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used - * to allocate and free memory. - * \a kbase_mem_allocator_term must be called to clean up the allocator. - * All memory obtained via \a kbase_mem_allocator_alloc must have been - * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called. + * Note: the size of the pool may in certain corner cases exceed @max_size! * - * @param allocator Allocator object to initialize - * @param max_size Maximum number of pages to keep on the freelist. - * @param kbdev The kbase device this allocator is used with - * @return 0 on success, an error code indicating what failed on - * error. + * Return: Number of free pages in the pool */ -int kbase_mem_allocator_init(struct kbase_mem_allocator *allocator, - unsigned int max_size, - struct kbase_device *kbdev); +static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) +{ + return ACCESS_ONCE(pool->cur_size); +} /** - * @brief Allocate memory via an OS based memory allocator. + * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool + * @pool: Memory pool to inspect * - * @param[in] allocator Allocator to obtain the memory from - * @param nr_pages Number of pages to allocate - * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored - * @return 0 if the pages were allocated, an error code indicating what failed on error + * Return: Maximum number of free pages in the pool */ -int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages); +static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) +{ + return pool->max_size; +} + /** - * @brief Free memory obtained for an OS based memory allocator. + * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool + * @pool: Memory pool to inspect + * @max_size: Maximum number of free pages the pool can hold * - * @param[in] allocator Allocator to free the memory back to - * @param nr_pages Number of pages to free - * @param[in] pages Pointer to an array holding the physical address of the paghes to free. - * @param[in] sync_back true case the memory should be synced back + * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. + * For details see kbase_mem_pool_shrink(). */ -void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back); +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); /** - * @brief Terminate an OS based memory allocator. + * kbase_mem_pool_trim - Grow or shrink the pool to a new size + * @pool: Memory pool to trim + * @new_size: New number of pages in the pool * - * Frees all cached allocations and clean up internal state. - * All allocate pages must have been \a kbase_mem_allocator_free before - * this function is called. + * If @new_size > @cur_size, fill the pool with new pages from the kernel, but + * not above @max_size. + * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. * - * @param[in] allocator Allocator to terminate + * Return: The new size of the pool */ -void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator); - +size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); int kbase_region_tracker_init(struct kbase_context *kctx); @@ -471,10 +552,10 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 void kbase_free_alloced_region(struct kbase_va_region *reg); int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); bool kbase_check_alloc_flags(unsigned long flags); bool kbase_check_import_flags(unsigned long flags); -void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags); +void kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags); void kbase_gpu_vm_lock(struct kbase_context *kctx); void kbase_gpu_vm_unlock(struct kbase_context *kctx); @@ -561,33 +642,6 @@ void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa, void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); -/** - * Set attributes for imported tmem region - * - * This function sets (extends with) requested attributes for given region - * of imported external memory - * - * @param[in] kctx The kbase context which the tmem belongs to - * @param[in] gpu_addr The base address of the tmem region - * @param[in] attributes The attributes of tmem region to be set - * - * @return 0 on success. Any other value indicates failure. - */ -int kbase_tmem_set_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 attributes); - -/** - * Get attributes of imported tmem region - * - * This function retrieves the attributes of imported external memory - * - * @param[in] kctx The kbase context which the tmem belongs to - * @param[in] gpu_addr The base address of the tmem region - * @param[out] attributes The actual attributes of tmem region - * - * @return 0 on success. Any other value indicates failure. - */ -int kbase_tmem_get_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 *const attributes); - /* OS specific functions */ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); @@ -712,6 +766,11 @@ static inline dma_addr_t kbase_dma_addr(struct page *p) return (dma_addr_t)page_private(p); } +static inline void kbase_clear_dma_addr(struct page *p) +{ + ClearPagePrivate(p); +} + /** * @brief Process a bus or page fault. * diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c deleted file mode 100644 index 642fa44bf358..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/** - * @file mali_kbase_mem.c - * Base kernel memory APIs - */ -#include -#include -#include -#include -#include -#include -#include - -int kbase_mem_lowlevel_init(struct kbase_device *kbdev) -{ - return 0; -} - -void kbase_mem_lowlevel_term(struct kbase_device *kbdev) -{ -} - -static unsigned long kbase_mem_allocator_count(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_mem_allocator *allocator; - - allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); - return atomic_read(&allocator->free_list_size); -} - -static unsigned long kbase_mem_allocator_scan(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_mem_allocator *allocator; - int i; - int freed; - - allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); - - might_sleep(); - - mutex_lock(&allocator->free_list_lock); - i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan); - freed = i; - - atomic_sub(i, &allocator->free_list_size); - - while (i--) { - struct page *p; - - BUG_ON(list_empty(&allocator->free_list_head)); - p = list_first_entry(&allocator->free_list_head, - struct page, lru); - list_del(&p->lru); - ClearPagePrivate(p); - __free_page(p); - } - mutex_unlock(&allocator->free_list_lock); - return atomic_read(&allocator->free_list_size); -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_allocator_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_allocator_count(s, sc); - - return kbase_mem_allocator_scan(s, sc); -} -#endif - -int kbase_mem_allocator_init(struct kbase_mem_allocator *const allocator, - unsigned int max_size, struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(NULL != allocator); - KBASE_DEBUG_ASSERT(kbdev); - - INIT_LIST_HEAD(&allocator->free_list_head); - - allocator->kbdev = kbdev; - - mutex_init(&allocator->free_list_lock); - - atomic_set(&allocator->free_list_size, 0); - - allocator->free_list_max_size = max_size; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink; -#else - allocator->free_list_reclaimer.count_objects = - kbase_mem_allocator_count; - allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan; -#endif - allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) - allocator->free_list_reclaimer.batch = 0; -#endif - - register_shrinker(&allocator->free_list_reclaimer); - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_mem_allocator_init); - -void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator) -{ - KBASE_DEBUG_ASSERT(NULL != allocator); - - unregister_shrinker(&allocator->free_list_reclaimer); - mutex_lock(&allocator->free_list_lock); - while (!list_empty(&allocator->free_list_head)) { - struct page *p; - - p = list_first_entry(&allocator->free_list_head, struct page, - lru); - list_del(&p->lru); - dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - ClearPagePrivate(p); - __free_page(p); - } - atomic_set(&allocator->free_list_size, 0); - mutex_unlock(&allocator->free_list_lock); - mutex_destroy(&allocator->free_list_lock); -} -KBASE_EXPORT_TEST_API(kbase_mem_allocator_term); - -int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages) -{ - struct page *p; - int i; - int num_from_free_list; - struct list_head from_free_list = LIST_HEAD_INIT(from_free_list); - gfp_t gfp; - - might_sleep(); - - KBASE_DEBUG_ASSERT(NULL != allocator); - - /* take from the free list first */ - mutex_lock(&allocator->free_list_lock); - num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size)); - atomic_sub(num_from_free_list, &allocator->free_list_size); - for (i = 0; i < num_from_free_list; i++) { - BUG_ON(list_empty(&allocator->free_list_head)); - p = list_first_entry(&allocator->free_list_head, struct page, lru); - list_move(&p->lru, &from_free_list); - } - mutex_unlock(&allocator->free_list_lock); - i = 0; - - /* Allocate as many pages from the pool of already allocated pages. */ - list_for_each_entry(p, &from_free_list, lru) { - pages[i] = PFN_PHYS(page_to_pfn(p)); - i++; - } - - if (i == nr_pages) - return 0; - -#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) - /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ - gfp = GFP_USER | __GFP_ZERO; -#else - gfp = GFP_HIGHUSER | __GFP_ZERO; -#endif - - if (current->flags & PF_KTHREAD) { - /* Don't trigger OOM killer from kernel threads, e.g. when - * growing memory on GPU page fault */ - gfp |= __GFP_NORETRY; - } - - /* If not all pages were sourced from the pool, request new ones. */ - for (; i < nr_pages; i++) { - dma_addr_t dma_addr; - - p = alloc_page(gfp); - if (NULL == p) - goto err_out_roll_back; - - dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) { - __free_page(p); - goto err_out_roll_back; - } - - kbase_set_dma_addr(p, dma_addr); - pages[i] = PFN_PHYS(page_to_pfn(p)); - BUG_ON(dma_addr != pages[i]); - } - - return 0; - -err_out_roll_back: - while (i--) { - p = pfn_to_page(PFN_DOWN(pages[i])); - pages[i] = (phys_addr_t)0; - dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - ClearPagePrivate(p); - __free_page(p); - } - - return -ENOMEM; -} -KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc); - -void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back) -{ - int i = 0; - int page_count = 0; - int tofree; - - LIST_HEAD(new_free_list_items); - - KBASE_DEBUG_ASSERT(NULL != allocator); - - might_sleep(); - - /* Starting by just freeing the overspill. - * As we do this outside of the lock we might spill too many pages - * or get too many on the free list, but the max_size is just a ballpark so it is ok - * providing that tofree doesn't exceed nr_pages - */ - tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0); - tofree = nr_pages - MIN(tofree, nr_pages); - for (; i < tofree; i++) { - if (likely(0 != pages[i])) { - struct page *p; - - p = pfn_to_page(PFN_DOWN(pages[i])); - dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - ClearPagePrivate(p); - pages[i] = (phys_addr_t)0; - __free_page(p); - } - } - - for (; i < nr_pages; i++) { - if (likely(0 != pages[i])) { - struct page *p; - - p = pfn_to_page(PFN_DOWN(pages[i])); - pages[i] = (phys_addr_t)0; - /* Sync back the memory to ensure that future cache - * invalidations don't trample on memory. - */ - if (sync_back) - dma_sync_single_for_cpu(allocator->kbdev->dev, - kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - - list_add(&p->lru, &new_free_list_items); - page_count++; - } - } - mutex_lock(&allocator->free_list_lock); - list_splice(&new_free_list_items, &allocator->free_list_head); - atomic_add(page_count, &allocator->free_list_size); - mutex_unlock(&allocator->free_list_lock); -} -KBASE_EXPORT_TEST_API(kbase_mem_allocator_free); - diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h deleted file mode 100644 index ab7b25953f2f..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -#include -#include -#include - -/* raw page handling */ -struct kbase_mem_allocator { - struct kbase_device *kbdev; - atomic_t free_list_size; - unsigned int free_list_max_size; - struct mutex free_list_lock; - struct list_head free_list_head; - struct shrinker free_list_reclaimer; -}; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c deleted file mode 100644 index df9761b1e2e7..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -/** - * @file mali_kbase_mem.c - * Base kernel memory APIs - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* This code does not support having multiple kbase devices, or rmmod/insmod */ - -static unsigned long kbase_carveout_start_pfn = ~0UL; -static unsigned long kbase_carveout_end_pfn; -static LIST_HEAD(kbase_carveout_free_list); -static DEFINE_MUTEX(kbase_carveout_free_list_lock); -static unsigned int kbase_carveout_pages; -static atomic_t kbase_carveout_used_pages; -static atomic_t kbase_carveout_system_pages; - -static struct page *kbase_carveout_get_page(struct kbase_mem_allocator *allocator) -{ - struct page *p = NULL; - gfp_t gfp; - - mutex_lock(&kbase_carveout_free_list_lock); - if (!list_empty(&kbase_carveout_free_list)) { - p = list_first_entry(&kbase_carveout_free_list, struct page, lru); - list_del(&p->lru); - atomic_inc(&kbase_carveout_used_pages); - } - mutex_unlock(&kbase_carveout_free_list_lock); - - if (!p) { - dma_addr_t dma_addr; -#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) - /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ - gfp = GFP_USER | __GFP_ZERO; -#else - gfp = GFP_HIGHUSER | __GFP_ZERO; -#endif - - if (current->flags & PF_KTHREAD) { - /* Don't trigger OOM killer from kernel threads, e.g. - * when growing memory on GPU page fault */ - gfp |= __GFP_NORETRY; - } - - p = alloc_page(gfp); - if (!p) - goto out; - - dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) { - __free_page(p); - p = NULL; - goto out; - } - - kbase_set_dma_addr(p, dma_addr); - BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p))); - atomic_inc(&kbase_carveout_system_pages); - } -out: - return p; -} - -static void kbase_carveout_put_page(struct page *p, - struct kbase_mem_allocator *allocator) -{ - if (page_to_pfn(p) >= kbase_carveout_start_pfn && - page_to_pfn(p) <= kbase_carveout_end_pfn) { - mutex_lock(&kbase_carveout_free_list_lock); - list_add(&p->lru, &kbase_carveout_free_list); - atomic_dec(&kbase_carveout_used_pages); - mutex_unlock(&kbase_carveout_free_list_lock); - } else { - dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - ClearPagePrivate(p); - __free_page(p); - atomic_dec(&kbase_carveout_system_pages); - } -} - -static int kbase_carveout_seq_show(struct seq_file *s, void *data) -{ - seq_printf(s, "carveout pages: %u\n", kbase_carveout_pages); - seq_printf(s, "used carveout pages: %u\n", - atomic_read(&kbase_carveout_used_pages)); - seq_printf(s, "used system pages: %u\n", - atomic_read(&kbase_carveout_system_pages)); - return 0; -} - -static int kbasep_carveout_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, kbase_carveout_seq_show, NULL); -} - -static const struct file_operations kbase_carveout_debugfs_fops = { - .open = kbasep_carveout_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -static int kbase_carveout_init(struct device *dev) -{ - unsigned long pfn; - static int once; - - mutex_lock(&kbase_carveout_free_list_lock); - BUG_ON(once); - once = 1; - - for (pfn = kbase_carveout_start_pfn; pfn <= kbase_carveout_end_pfn; pfn++) { - struct page *p = pfn_to_page(pfn); - dma_addr_t dma_addr; - - dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto out_rollback; - - kbase_set_dma_addr(p, dma_addr); - BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p))); - - list_add_tail(&p->lru, &kbase_carveout_free_list); - } - - mutex_unlock(&kbase_carveout_free_list_lock); - - debugfs_create_file("kbase_carveout", S_IRUGO, NULL, NULL, - &kbase_carveout_debugfs_fops); - - return 0; - -out_rollback: - while (!list_empty(&kbase_carveout_free_list)) { - struct page *p; - - p = list_first_entry(&kbase_carveout_free_list, struct page, lru); - dma_unmap_page(dev, kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - ClearPagePrivate(p); - list_del(&p->lru); - } - - mutex_unlock(&kbase_carveout_free_list_lock); - return -ENOMEM; -} - -int __init kbase_carveout_mem_reserve(phys_addr_t size) -{ - phys_addr_t mem; - -#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) \ - && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) - /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ - mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ACCESSIBLE); -#else - mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE); -#endif - if (mem == 0) { - pr_warn("%s: Failed to allocate %d for kbase carveout\n", - __func__, size); - return -ENOMEM; - } - - kbase_carveout_start_pfn = PFN_DOWN(mem); - kbase_carveout_end_pfn = PFN_DOWN(mem + size - 1); - kbase_carveout_pages = kbase_carveout_end_pfn - kbase_carveout_start_pfn + 1; - - return 0; -} - -int kbase_mem_lowlevel_init(struct kbase_device *kbdev) -{ - return kbase_carveout_init(kbdev->dev); -} - -void kbase_mem_lowlevel_term(struct kbase_device *kbdev) -{ -} - -static int kbase_mem_allocator_shrink(struct shrinker *s, struct shrink_control *sc) -{ - struct kbase_mem_allocator *allocator; - int i; - int freed; - - allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); - - if (sc->nr_to_scan == 0) - return atomic_read(&allocator->free_list_size); - - might_sleep(); - - mutex_lock(&allocator->free_list_lock); - i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan); - freed = i; - - atomic_sub(i, &allocator->free_list_size); - - while (i--) { - struct page *p; - - BUG_ON(list_empty(&allocator->free_list_head)); - p = list_first_entry(&allocator->free_list_head, struct page, lru); - list_del(&p->lru); - kbase_carveout_put_page(p, allocator); - } - mutex_unlock(&allocator->free_list_lock); - return atomic_read(&allocator->free_list_size); -} - -int kbase_mem_allocator_init(struct kbase_mem_allocator * const allocator, - unsigned int max_size, struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(NULL != allocator); - KBASE_DEBUG_ASSERT(kbdev); - - INIT_LIST_HEAD(&allocator->free_list_head); - - allocator->kbdev = kbdev; - - mutex_init(&allocator->free_list_lock); - - atomic_set(&allocator->free_list_size, 0); - - allocator->free_list_max_size = max_size; - allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink; - allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) /* Kernel versions prior to 3.1 : struct shrinker does not define batch */ - allocator->free_list_reclaimer.batch = 0; -#endif - - register_shrinker(&allocator->free_list_reclaimer); - - return 0; -} - -void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator) -{ - KBASE_DEBUG_ASSERT(NULL != allocator); - - unregister_shrinker(&allocator->free_list_reclaimer); - - while (!list_empty(&allocator->free_list_head)) { - struct page *p; - - p = list_first_entry(&allocator->free_list_head, struct page, - lru); - list_del(&p->lru); - - kbase_carveout_put_page(p, allocator); - } - mutex_destroy(&allocator->free_list_lock); -} - - -int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages) -{ - struct page *p; - int i; - int num_from_free_list; - struct list_head from_free_list = LIST_HEAD_INIT(from_free_list); - - might_sleep(); - - KBASE_DEBUG_ASSERT(NULL != allocator); - - /* take from the free list first */ - mutex_lock(&allocator->free_list_lock); - num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size)); - atomic_sub(num_from_free_list, &allocator->free_list_size); - for (i = 0; i < num_from_free_list; i++) { - BUG_ON(list_empty(&allocator->free_list_head)); - p = list_first_entry(&allocator->free_list_head, struct page, lru); - list_move(&p->lru, &from_free_list); - } - mutex_unlock(&allocator->free_list_lock); - i = 0; - - /* Allocate as many pages from the pool of already allocated pages. */ - list_for_each_entry(p, &from_free_list, lru) { - pages[i] = PFN_PHYS(page_to_pfn(p)); - i++; - } - - if (i == nr_pages) - return 0; - - /* If not all pages were sourced from the pool, request new ones. */ - for (; i < nr_pages; i++) { - p = kbase_carveout_get_page(allocator); - if (NULL == p) - goto err_out_roll_back; - - kbase_sync_single_for_device(allocator->kbdev, - kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - - pages[i] = PFN_PHYS(page_to_pfn(p)); - } - - return 0; - -err_out_roll_back: - while (i--) { - struct page *p; - - p = pfn_to_page(PFN_DOWN(pages[i])); - pages[i] = (phys_addr_t)0; - kbase_carveout_put_page(p, allocator); - } - - return -ENOMEM; -} - -void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, u32 nr_pages, phys_addr_t *pages, bool sync_back) -{ - int i = 0; - int page_count = 0; - int tofree; - - LIST_HEAD(new_free_list_items); - - KBASE_DEBUG_ASSERT(NULL != allocator); - - might_sleep(); - - /* Starting by just freeing the overspill. - * As we do this outside of the lock we might spill too many pages - * or get too many on the free list, but the max_size is just a ballpark so it is ok - * providing that tofree doesn't exceed nr_pages - */ - tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0); - tofree = nr_pages - MIN(tofree, nr_pages); - for (; i < tofree; i++) { - if (likely(0 != pages[i])) { - struct page *p; - - p = pfn_to_page(PFN_DOWN(pages[i])); - pages[i] = (phys_addr_t)0; - kbase_carveout_put_page(p, allocator); - } - } - - for (; i < nr_pages; i++) { - if (likely(0 != pages[i])) { - struct page *p; - - p = pfn_to_page(PFN_DOWN(pages[i])); - pages[i] = (phys_addr_t)0; - /* Sync back the memory to ensure that future cache - * invalidations don't trample on memory. - */ - if (sync_back) - kbase_sync_single_for_cpu(allocator->kbdev, - kbase_dma_addr(p), - PAGE_SIZE, - DMA_BIDIRECTIONAL); - list_add(&p->lru, &new_free_list_items); - page_count++; - } - } - mutex_lock(&allocator->free_list_lock); - list_splice(&new_free_list_items, &allocator->free_list_head); - atomic_add(page_count, &allocator->free_list_size); - mutex_unlock(&allocator->free_list_lock); -} -KBASE_EXPORT_TEST_API(kbase_mem_allocator_free); - diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 1ddfe5c9737b..3e4481a77e15 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -118,7 +118,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages goto no_region; } - kbase_update_region_flags(reg, *flags); + kbase_update_region_flags(kctx, reg, *flags); if (kbase_reg_prepare_native(reg, kctx) != 0) { dev_err(dev, "Failed to prepare region"); @@ -130,7 +130,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages else reg->extent = 0; - if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) { + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", (unsigned long long)commit_pages, (unsigned long long)va_pages); @@ -188,6 +188,7 @@ bad_flags: bad_size: return NULL; } +KBASE_EXPORT_TEST_API(kbase_mem_alloc); int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out) { @@ -532,7 +533,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* no read or write permission given on import, only on run do we give the right permissions */ - reg->gpu_alloc->type = BASE_TMEM_IMPORT_TYPE_UMM; + reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM; reg->gpu_alloc->imported.umm.sgt = NULL; reg->gpu_alloc->imported.umm.dma_buf = dma_buf; reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; @@ -619,7 +620,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - kbase_update_region_flags(reg, *flags); + kbase_update_region_flags(kctx, reg, *flags); reg->gpu_alloc->imported.alias.nents = nents; reg->gpu_alloc->imported.alias.stride = stride; @@ -1090,8 +1091,7 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) locked_bad_fault: kbase_gpu_vm_unlock(map->kctx); - send_sig(SIGSEGV, current, 1); - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } static const struct vm_operations_struct kbase_vm_ops = { @@ -1144,7 +1144,7 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { /* We can't map vmalloc'd memory uncached. * Other memory will have been returned from - * kbase_mem_allocator_alloc which would be + * kbase_mem_pool which would be * suitable for mapping uncached. */ BUG_ON(kaddr); @@ -1152,13 +1152,20 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm } if (!kaddr) { + unsigned long addr = vma->vm_start + aligned_offset; + vma->vm_flags |= VM_PFNMAP; for (i = 0; i < nr_pages; i++) { - err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT); + unsigned long pfn = PFN_DOWN(page_array[i + start_off]); + + err = vm_insert_pfn(vma, addr, pfn); if (WARN_ON(err)) break; + + addr += PAGE_SIZE; } } else { + WARN_ON(aligned_offset); /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ vma->vm_flags |= VM_MIXEDMAP; /* vmalloc remaping is easy... */ @@ -1346,6 +1353,44 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx) up_read(&mm->mmap_sem); } +#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE) +/* This section is required only for instrumentation. */ + +static void kbase_dma_buf_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* Non-atomic as we're under Linux's mm lock. */ + map->count++; +} + +static void kbase_dma_buf_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* Non-atomic as we're under Linux's mm lock. */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + + kbase_gpu_vm_lock(map->kctx); + list_del(&map->mappings_list); + kbase_gpu_vm_unlock(map->kctx); + kfree(map); +} + +static const struct vm_operations_struct kbase_dma_mmap_ops = { + .open = kbase_dma_buf_vm_open, + .close = kbase_dma_buf_vm_close, +}; +#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */ + int kbase_mmap(struct file *file, struct vm_area_struct *vma) { struct kbase_context *kctx = file->private_data; @@ -1422,76 +1467,94 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; reg = kctx->pending_regions[cookie]; - if (NULL != reg) { - if (reg->flags & KBASE_REG_ALIGNED) { - /* nr_pages must be able to hold alignment pages - * plus actual pages */ - if (nr_pages != ((1UL << gpu_pc_bits >> - PAGE_SHIFT) + - reg->nr_pages)) { - /* incorrect mmap size */ - /* leave the cookie for a potential - * later mapping, or to be reclaimed - * later when the context is freed */ - err = -ENOMEM; - goto out_unlock; - } + if (!reg) { + err = -ENOMEM; + goto out_unlock; + } + + if (reg->flags & KBASE_REG_ALIGNED) { + /* nr_pages must be able to hold alignment pages + * plus actual pages */ + unsigned long align = 1ULL << gpu_pc_bits; + unsigned long extra_pages = 3 * PFN_DOWN(align); + unsigned long aligned_addr; + unsigned long aligned_addr_end; + unsigned long nr_bytes = reg->nr_pages << PAGE_SHIFT; + + if (kctx->api_version < KBASE_API_VERSION(8, 5)) + /* Maintain compatibility with old userspace */ + extra_pages = PFN_DOWN(align); - aligned_offset = (vma->vm_start + - (1UL << gpu_pc_bits) - 1) & - ~((1UL << gpu_pc_bits) - 1); - aligned_offset -= vma->vm_start; - } else if (reg->nr_pages != nr_pages) { + if (nr_pages != reg->nr_pages + extra_pages) { /* incorrect mmap size */ - /* leave the cookie for a potential later - * mapping, or to be reclaimed later when the - * context is freed */ + /* leave the cookie for a potential + * later mapping, or to be reclaimed + * later when the context is freed */ err = -ENOMEM; goto out_unlock; } - if ((vma->vm_flags & VM_READ && - !(reg->flags & KBASE_REG_CPU_RD)) || - (vma->vm_flags & VM_WRITE && - !(reg->flags & KBASE_REG_CPU_WR))) { - /* VM flags inconsistent with region flags */ - err = -EPERM; - dev_err(dev, "%s:%d inconsistent VM flags\n", - __FILE__, __LINE__); - goto out_unlock; + aligned_addr = ALIGN(vma->vm_start, align); + aligned_addr_end = aligned_addr + nr_bytes; + + if (kctx->api_version >= KBASE_API_VERSION(8, 5)) { + if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) { + /* Can't end at 4GB boundary */ + aligned_addr += 2 * align; + } else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) { + /* Can't start at 4GB boundary */ + aligned_addr += align; + } } - /* adjust down nr_pages to what we have physically */ - nr_pages = kbase_reg_current_backed_size(reg); + aligned_offset = aligned_addr - vma->vm_start; + } else if (reg->nr_pages != nr_pages) { + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the + * context is freed */ + err = -ENOMEM; + goto out_unlock; + } + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + + /* adjust down nr_pages to what we have physically */ + nr_pages = kbase_reg_current_backed_size(reg); - if (kbase_gpu_mmap(kctx, reg, + if (kbase_gpu_mmap(kctx, reg, vma->vm_start + aligned_offset, reg->nr_pages, 1) != 0) { - dev_err(dev, "%s:%d\n", __FILE__, __LINE__); - /* Unable to map in GPU space. */ - WARN_ON(1); - err = -ENOMEM; - goto out_unlock; - } - - /* no need for the cookie anymore */ - kctx->pending_regions[cookie] = NULL; - kctx->cookies |= (1UL << cookie); + dev_err(dev, "%s:%d\n", __FILE__, __LINE__); + /* Unable to map in GPU space. */ + WARN_ON(1); + err = -ENOMEM; + goto out_unlock; + } - /* - * Overwrite the offset with the - * region start_pfn, so we effectively - * map from offset 0 in the region. - */ - vma->vm_pgoff = reg->start_pfn; + /* no need for the cookie anymore */ + kctx->pending_regions[cookie] = NULL; + kctx->cookies |= (1UL << cookie); - /* free the region on munmap */ - free_on_close = 1; - goto map; - } + /* + * Overwrite the offset with the + * region start_pfn, so we effectively + * map from offset 0 in the region. + */ + vma->vm_pgoff = reg->start_pfn; - err = -ENOMEM; - goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + goto map; } default: { reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT); @@ -1546,7 +1609,36 @@ map: #ifdef CONFIG_DMA_SHARED_BUFFER dma_map: err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn); -#endif /* CONFIG_DMA_SHARED_BUFFER */ +#if defined(CONFIG_MALI_TRACE_TIMELINE) + /* This section is required only for instrumentation. */ + /* Add created mapping to imported region mapping list. + * It is important to make it visible to dumping infrastructure. + * Add mapping only if vm_ops structure is not used by memory owner. */ + WARN_ON(vma->vm_ops); + WARN_ON(vma->vm_private_data); + if (!err && !vma->vm_ops && !vma->vm_private_data) { + struct kbase_cpu_mapping *map = kzalloc( + sizeof(*map), + GFP_KERNEL); + + if (map) { + map->kctx = reg->kctx; + map->region = NULL; + map->page_off = vma->vm_pgoff; + map->vm_start = vma->vm_start; + map->vm_end = vma->vm_end; + map->count = 1; /* start with one ref */ + + vma->vm_ops = &kbase_dma_mmap_ops; + vma->vm_private_data = map; + + list_add( + &map->mappings_list, + ®->cpu_alloc->mappings); + } + } +#endif /* CONFIG_MALI_TRACE_TIMELINE */ +#endif /* CONFIG_DMA_SHARED_BUFFER */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -1662,6 +1754,7 @@ out_unlock: kbase_gpu_vm_unlock(kctx); return NULL; } +KBASE_EXPORT_TEST_API(kbase_vmap); void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) { @@ -1708,6 +1801,7 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) map->size = 0; map->is_cached = false; } +KBASE_EXPORT_TEST_API(kbase_vunmap); void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) { @@ -1835,7 +1929,7 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_ goto no_reg; reg->flags &= ~KBASE_REG_FREE; - kbase_update_region_flags(reg, flags); + kbase_update_region_flags(kctx, reg, flags); reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); if (IS_ERR_OR_NULL(reg->cpu_alloc)) diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c new file mode 100644 index 000000000000..a049205d0e90 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -0,0 +1,574 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Backwards compatibility with kernels using the old carveout allocator */ +int __init kbase_carveout_mem_reserve(phys_addr_t size) +{ + return 0; +} + +#define pool_dbg(pool, format, ...) \ + dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ + (pool->next_pool) ? "kctx" : "kbdev", \ + kbase_mem_pool_size(pool), \ + kbase_mem_pool_max_size(pool), \ + ##__VA_ARGS__) + +static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ + spin_lock(&pool->pool_lock); +} + +static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ + spin_unlock(&pool->pool_lock); +} + +static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) +{ + ssize_t max_size = kbase_mem_pool_max_size(pool); + ssize_t cur_size = kbase_mem_pool_size(pool); + + return max(max_size - cur_size, (ssize_t)0); +} + +static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); +} + +static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) == 0; +} + +static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, + struct page *p) +{ + lockdep_assert_held(&pool->pool_lock); + + list_add(&p->lru, &pool->page_list); + pool->cur_size++; + + pool_dbg(pool, "added page\n"); +} + +static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); + kbase_mem_pool_unlock(pool); +} + +static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + lockdep_assert_held(&pool->pool_lock); + + list_splice(page_list, &pool->page_list); + pool->cur_size += nr_pages; + + pool_dbg(pool, "added %zu pages\n", nr_pages); +} + +static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); + kbase_mem_pool_unlock(pool); +} + +static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + if (kbase_mem_pool_is_empty(pool)) + return NULL; + + p = list_first_entry(&pool->page_list, struct page, lru); + list_del_init(&p->lru); + pool->cur_size--; + + pool_dbg(pool, "removed page\n"); + + return p; +} + +static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) +{ + struct page *p; + + kbase_mem_pool_lock(pool); + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_unlock(pool); + + return p; +} + +static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct device *dev = pool->kbdev->dev; + + dma_sync_single_for_device(dev, kbase_dma_addr(p), + PAGE_SIZE, DMA_BIDIRECTIONAL); +} + +static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, + struct page *p) +{ + clear_highpage(p); + kbase_mem_pool_sync_page(pool, p); +} + +static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, + struct page *p) +{ + /* Zero page before spilling */ + kbase_mem_pool_zero_page(next_pool, p); + + kbase_mem_pool_add(next_pool, p); +} + +static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool) +{ + struct page *p; + gfp_t gfp; + struct device *dev = pool->kbdev->dev; + dma_addr_t dma_addr; + +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + gfp = GFP_USER | __GFP_ZERO; +#else + gfp = GFP_HIGHUSER | __GFP_ZERO; +#endif + + if (current->flags & PF_KTHREAD) { + /* Don't trigger OOM killer from kernel threads, e.g. when + * growing memory on GPU page fault */ + gfp |= __GFP_NORETRY; + } + + p = alloc_page(gfp); + if (!p) + return NULL; + + dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) { + __free_page(p); + return NULL; + } + + WARN_ON(dma_addr != page_to_phys(p)); + + kbase_set_dma_addr(p, dma_addr); + + pool_dbg(pool, "alloced page from kernel\n"); + + return p; +} + +static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct device *dev = pool->kbdev->dev; + dma_addr_t dma_addr = kbase_dma_addr(p); + + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + kbase_clear_dma_addr(p); + __free_page(p); + + pool_dbg(pool, "freed page to kernel\n"); +} + +static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + struct page *p; + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_free_page(pool, p); + } + + return i; +} + +static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + size_t nr_freed; + + kbase_mem_pool_lock(pool); + nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + kbase_mem_pool_unlock(pool); + + return nr_freed; +} + +static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, + size_t nr_to_grow) +{ + struct page *p; + size_t i; + + for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { + p = kbase_mem_pool_alloc_page(pool); + kbase_mem_pool_add(pool, p); + } + + return i; +} + +size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +{ + size_t cur_size; + + cur_size = kbase_mem_pool_size(pool); + + if (new_size < cur_size) + kbase_mem_pool_shrink(pool, cur_size - new_size); + else if (new_size > cur_size) + kbase_mem_pool_grow(pool, new_size - cur_size); + + cur_size = kbase_mem_pool_size(pool); + + return cur_size; +} + +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) +{ + size_t cur_size; + size_t nr_to_shrink; + + kbase_mem_pool_lock(pool); + + pool->max_size = max_size; + + cur_size = kbase_mem_pool_size(pool); + if (max_size < cur_size) { + nr_to_shrink = cur_size - max_size; + kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + } + + kbase_mem_pool_unlock(pool); +} + + +static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + pool_dbg(pool, "reclaim count: %zu\n", kbase_mem_pool_size(pool)); + return kbase_mem_pool_size(pool); +} + +static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + unsigned long freed; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + + pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); + + freed = kbase_mem_pool_shrink(pool, sc->nr_to_scan); + + pool_dbg(pool, "reclaim freed %ld pages\n", freed); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_pool_reclaim_count_objects(s, sc); + + return kbase_mem_pool_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + size_t max_size, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool) +{ + pool->cur_size = 0; + pool->max_size = max_size; + pool->kbdev = kbdev; + pool->next_pool = next_pool; + + spin_lock_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->page_list); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; +#else + pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; + pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; +#endif + pool->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + pool->reclaim.batch = 0; +#endif + register_shrinker(&pool->reclaim); + + pool_dbg(pool, "initialized\n"); + + return 0; +} + +void kbase_mem_pool_term(struct kbase_mem_pool *pool) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p; + size_t nr_to_spill = 0; + LIST_HEAD(spill_list); + int i; + + pool_dbg(pool, "terminate()\n"); + + unregister_shrinker(&pool->reclaim); + + kbase_mem_pool_lock(pool); + pool->max_size = 0; + + if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool (may overspill) */ + nr_to_spill = kbase_mem_pool_capacity(next_pool); + nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); + + /* Zero pages first without holding the next_pool lock */ + for (i = 0; i < nr_to_spill; i++) { + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_zero_page(pool, p); + list_add(&p->lru, &spill_list); + } + } + + while (!kbase_mem_pool_is_empty(pool)) { + /* Free remaining pages to kernel */ + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_free_page(pool, p); + } + + kbase_mem_pool_unlock(pool); + + if (next_pool && nr_to_spill) { + /* Add new page list to next_pool */ + kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + + pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); + } + + pool_dbg(pool, "terminated\n"); +} + +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) +{ + struct page *p; + + pool_dbg(pool, "alloc()\n"); + + p = kbase_mem_pool_remove(pool); + + if (!p && pool->next_pool) { + /* Allocate via next pool */ + return kbase_mem_pool_alloc(pool->next_pool); + } + + if (!p) { + /* Get page from kernel */ + p = kbase_mem_pool_alloc_page(pool); + } + + return p; +} + +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + + pool_dbg(pool, "free()\n"); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add(pool, p); + } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool */ + kbase_mem_pool_spill(next_pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, + phys_addr_t *pages) +{ + struct page *p; + size_t nr_from_pool; + size_t i; + int err = -ENOMEM; + + pool_dbg(pool, "alloc_pages(%zu):\n", nr_pages); + + /* Get pages from this pool */ + kbase_mem_pool_lock(pool); + nr_from_pool = min(nr_pages, kbase_mem_pool_size(pool)); + for (i = 0; i < nr_from_pool; i++) { + p = kbase_mem_pool_remove_locked(pool); + pages[i] = page_to_phys(p); + } + kbase_mem_pool_unlock(pool); + + if (i != nr_pages && pool->next_pool) { + /* Allocate via next pool */ + err = kbase_mem_pool_alloc_pages(pool->next_pool, + nr_pages - i, pages + i); + + if (err) + goto err_rollback; + + i += nr_pages - i; + } + + /* Get any remaining pages from kernel */ + for (; i < nr_pages; i++) { + p = kbase_mem_pool_alloc_page(pool); + if (!p) + goto err_rollback; + pages[i] = page_to_phys(p); + } + + pool_dbg(pool, "alloc_pages(%zu) done\n", nr_pages); + + return 0; + +err_rollback: + kbase_mem_pool_free_pages(pool, i, pages, false); + return err; +} + +static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, + size_t nr_pages, phys_addr_t *pages, bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first without holding the pool lock */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!pages[i])) + continue; + + p = phys_to_page(pages[i]); + + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + pages[i] = 0; + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + phys_addr_t *pages, bool dirty) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + pool_dbg(pool, "free_pages(%zu):\n", nr_pages); + + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + + i += nr_to_pool; + + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); + + kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, + true, dirty); + i += nr_to_pool; + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!pages[i])) + continue; + + p = phys_to_page(pages[i]); + kbase_mem_pool_free_page(pool, p); + pages[i] = 0; + } + + pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c new file mode 100644 index 000000000000..493665b7e607 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include + +#include + +#ifdef CONFIG_DEBUG_FS + +static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val) +{ + struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + + *val = kbase_mem_pool_size(pool); + + return 0; +} + +static int kbase_mem_pool_debugfs_size_set(void *data, u64 val) +{ + struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + + kbase_mem_pool_trim(pool, val); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops, + kbase_mem_pool_debugfs_size_get, + kbase_mem_pool_debugfs_size_set, + "%llu\n"); + +static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val) +{ + struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + + *val = kbase_mem_pool_max_size(pool); + + return 0; +} + +static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val) +{ + struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; + + kbase_mem_pool_set_max_size(pool, val); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, + kbase_mem_pool_debugfs_max_size_get, + kbase_mem_pool_debugfs_max_size_set, + "%llu\n"); + +void kbase_mem_pool_debugfs_add(struct dentry *parent, + struct kbase_mem_pool *pool) +{ + debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, + pool, &kbase_mem_pool_debugfs_size_fops); + + debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, + pool, &kbase_mem_pool_debugfs_max_size_fops); +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h new file mode 100644 index 000000000000..458f3f09e697 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h @@ -0,0 +1,36 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_MEM_POOL_DEBUGFS_H +#define _KBASE_MEM_POOL_DEBUGFS_H + +#include + +/** + * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool + * @parent: Parent debugfs dentry + * @pool: Memory pool to control + * + * Adds two debugfs files under @parent: + * - mem_pool_size: get/set the current size of @pool + * - mem_pool_max_size: get/set the max size of @pool + */ +void kbase_mem_pool_debugfs_add(struct dentry *parent, + struct kbase_mem_pool *pool); + +#endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index 49cbf2e4e06e..2666cdbf4a5e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -56,6 +56,7 @@ static void kbase_mmu_sync_pgd(struct device *dev, dma_addr_t handle, size_t size) { + dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE); } @@ -104,29 +105,9 @@ void page_fault_worker(struct work_struct *data) /* Grab the context that was already refcounted in kbase_mmu_interrupt(). * Therefore, it cannot be scheduled out of this AS until we explicitly release it - * - * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + */ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); - - if (kctx == NULL) { - /* Only handle this if not already suspended */ - if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* Address space has no context, terminate the work */ - - /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); - - kbase_mmu_disable_as(kbdev, as_no); - - mutex_unlock(&faulting_as->transaction_mutex); - /* AS transaction end */ - - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_PAGE); - kbase_pm_context_idle(kbdev); - } + if (WARN_ON(!kctx)) { atomic_dec(&kbdev->faults_pending); return; } @@ -332,7 +313,6 @@ fault_done: phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) { - phys_addr_t pgd; u64 *page; int i; struct page *p; @@ -341,10 +321,10 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) kbase_atomic_add_pages(1, &kctx->used_pages); kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); - if (kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd) != 0) + p = kbase_mem_pool_alloc(&kctx->mem_pool); + if (!p) goto sub_pages; - p = pfn_to_page(PFN_DOWN(pgd)); page = kmap(p); if (NULL == page) goto alloc_free; @@ -356,11 +336,11 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE); - kunmap(pfn_to_page(PFN_DOWN(pgd))); - return pgd; + kunmap(p); + return page_to_phys(p); alloc_free: - kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, false); + kbase_mem_pool_free(&kctx->mem_pool, p, false); sub_pages: kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); @@ -985,7 +965,9 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1); if (zap) { - kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, true); + struct page *p = phys_to_page(target_pgd); + + kbase_mem_pool_free(&kctx->mem_pool, p, true); kbase_process_page_usage_dec(kctx, 1); kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); @@ -1027,7 +1009,7 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); - kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, true); + kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); kbase_process_page_usage_dec(kctx, 1); kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); @@ -1098,7 +1080,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) lockdep_assert_held(&kctx->reg_lock); if (0 == nr_pages) { - /* can't find in a 0 sized buffer, early out */ + /* can't dump in a 0 sized buffer, early out */ return NULL; } @@ -1109,8 +1091,33 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (kaddr) { u64 end_marker = 0xFFULL; - char *buffer = (char *)kaddr; - size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left); + char *buffer; + char *mmu_dump_buffer; + u64 config[3]; + size_t size; + + buffer = (char *)kaddr; + mmu_dump_buffer = buffer; + + if (kctx->api_version >= KBASE_API_VERSION(8, 4)) { + struct kbase_mmu_setup as_setup; + + kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); + config[0] = as_setup.transtab; + config[1] = as_setup.memattr; + config[2] = 0; + memcpy(buffer, &config, sizeof(config)); + mmu_dump_buffer += sizeof(config); + size_left -= sizeof(config); + } + + + + size = kbasep_mmu_dump_level(kctx, + kctx->pgd, + MIDGARD_MMU_TOPLEVEL, + &mmu_dump_buffer, + &size_left); if (!size) { vfree(kaddr); @@ -1119,6 +1126,10 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) /* Add on the size for the end marker */ size += sizeof(u64); + /* Add on the size for the config */ + if (kctx->api_version >= KBASE_API_VERSION(8, 4)) + size += sizeof(config); + if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { /* The buffer isn't big enough - free the memory and return failure */ @@ -1127,7 +1138,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) } /* Add the end marker */ - memcpy(buffer, &end_marker, sizeof(u64)); + memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } return kaddr; @@ -1152,9 +1163,13 @@ void bus_fault_worker(struct work_struct *data) /* Grab the context that was already refcounted in kbase_mmu_interrupt(). * Therefore, it cannot be scheduled out of this AS until we explicitly release it - * - * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + */ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); + if (WARN_ON(!kctx)) { + atomic_dec(&kbdev->faults_pending); + return; + } + #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. @@ -1179,19 +1194,19 @@ void bus_fault_worker(struct work_struct *data) /* AS transaction end */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_BUS); + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, - KBASE_MMU_FAULT_TYPE_BUS); + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); kbase_pm_context_idle(kbdev); } + #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) kbase_reset_gpu(kbdev); #endif /* KBASE_GPU_RESET_EN */ - /* By this point, the fault was handled in some way, so release the ctx refcount */ - if (kctx != NULL) - kbasep_js_runpool_release_ctx(kbdev, kctx); + + kbasep_js_runpool_release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); } @@ -1327,7 +1342,7 @@ static const char *access_type_name(struct kbase_device *kbdev, case AS_FAULTSTATUS_ACCESS_TYPE_EX: return "EXECUTE"; default: - KBASE_DEBUG_ASSERT(0); + WARN_ON(1); return NULL; } } @@ -1420,8 +1435,10 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, mutex_unlock(&as->transaction_mutex); /* AS transaction end */ /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_clear_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) @@ -1593,35 +1610,65 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - if (kctx == NULL) { + if (!kctx) { dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault", as->number, as->fault_addr); - } - if (kbase_as_has_bus_fault(as)) { - if (kctx) { - /* - * hw counters dumping in progress, signal the - * other thread that it failed - */ - if ((kbdev->hwcnt.kctx == kctx) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) - kbdev->hwcnt.backend.state = - KBASE_INSTR_STATE_FAULT; + /* Since no ctx was found, the MMU must be disabled. */ + WARN_ON(as->current_setup.transtab); + + if (kbase_as_has_bus_fault(as)) { + kbase_mmu_hw_clear_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } else if (kbase_as_has_page_fault(as)) { + kbase_mmu_hw_clear_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } +#if KBASE_GPU_RESET_EN + if (kbase_as_has_bus_fault(as) && + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { + bool reset_status; /* - * Stop the kctx from submitting more jobs and cause it - * to be scheduled out/rescheduled when all references - * to it are released + * Reset the GPU, like in bus_fault_worker, in case an + * earlier error hasn't been properly cleared by this + * point. */ - kbasep_js_clear_submit_allowed(js_devdata, kctx); + dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); + reset_status = kbase_prepare_to_reset_gpu_locked(kbdev); + if (reset_status) + kbase_reset_gpu_locked(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, as->fault_addr); + return; + } - } + if (kbase_as_has_bus_fault(as)) { + /* + * hw counters dumping in progress, signal the + * other thread that it failed + */ + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; + + /* + * Stop the kctx from submitting more jobs and cause it + * to be scheduled out/rescheduled when all references + * to it are released + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", + as->number, as->fault_addr); /* * We need to switch to UNMAPPED mode - but we do this in a @@ -1630,14 +1677,12 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex kbdev->kbase_group_error++; KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault)); WARN_ON(work_pending(&as->work_busfault)); - INIT_WORK(&as->work_busfault, bus_fault_worker); queue_work(as->pf_wq, &as->work_busfault); atomic_inc(&kbdev->faults_pending); } else { kbdev->kbase_group_error++; KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault)); WARN_ON(work_pending(&as->work_pagefault)); - INIT_WORK(&as->work_pagefault, page_fault_worker); queue_work(as->pf_wq, &as->work_pagefault); atomic_inc(&kbdev->faults_pending); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h index c6478b2e0a6d..986e959e9a0c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,7 +53,9 @@ struct kbase_context; enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, KBASE_MMU_FAULT_TYPE_PAGE, - KBASE_MMU_FAULT_TYPE_BUS + KBASE_MMU_FAULT_TYPE_BUS, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED }; /** @brief Configure an address space for use. diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h index 26e683e29607..2449c60a92fd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h @@ -26,9 +26,12 @@ struct kbase_context; struct kbase_device; struct kbase_as; +struct kbase_mmu_setup; struct kbase_mmu_mode { void (*update)(struct kbase_context *kctx); + void (*get_as_setup)(struct kbase_context *kctx, + struct kbase_mmu_setup * const setup); void (*disable_as)(struct kbase_device *kbdev, int as_nr); phys_addr_t (*pte_to_phy_addr)(u64 entry); int (*ate_is_valid)(u64 ate); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c index 2211a1dcf01b..079ef81d06d1 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -68,15 +68,12 @@ static inline void page_table_entry_set(u64 *pte, u64 phy) #endif } -static void mmu_update(struct kbase_context *kctx) +static void mmu_get_as_setup(struct kbase_context *kctx, + struct kbase_mmu_setup * const setup) { - struct kbase_device * const kbdev = kctx->kbdev; - struct kbase_as * const as = &kbdev->as[kctx->as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; - /* Set up the required caching policies at the correct indices * in the memattr register. */ - current_setup->memattr = + setup->memattr = (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << @@ -85,12 +82,21 @@ static void mmu_update(struct kbase_context *kctx) (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | 0; /* The other indices are unused for now */ - current_setup->transtab = (u64)kctx->pgd & + setup->transtab = (u64)kctx->pgd & ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); - current_setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; - current_setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; + setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + +} + +static void mmu_update(struct kbase_context *kctx) +{ + struct kbase_device * const kbdev = kctx->kbdev; + struct kbase_as * const as = &kbdev->as[kctx->as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + mmu_get_as_setup(kctx, current_setup); /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as, kctx); @@ -173,6 +179,7 @@ static void entry_invalidate(u64 *entry) static struct kbase_mmu_mode const lpae_mode = { .update = mmu_update, + .get_as_setup = mmu_get_as_setup, .disable_as = mmu_disable_as, .pte_to_phy_addr = pte_to_phy_addr, .ate_is_valid = ate_is_valid, diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index b5c7758e1bb9..71f005e32521 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -932,7 +932,7 @@ out: if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_disjoint_state_down(kctx->kbdev); - need_to_try_schedule_context |= jd_done_nolock(katom); + need_to_try_schedule_context |= jd_done_nolock(katom, NULL); } if (need_to_try_schedule_context) @@ -1079,28 +1079,41 @@ bool kbase_replay_process(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; struct kbase_jd_context *jctx = &kctx->jctx; + struct kbase_device *kbdev = kctx->kbdev; + + /* Don't replay this atom if these issues are not present in the + * hardware */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) && + !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) { + dev_dbg(kbdev->dev, "Hardware does not need replay workaround"); + + /* Signal failure to userspace */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + + return false; + } if (katom->event_code == BASE_JD_EVENT_DONE) { - dev_dbg(kctx->kbdev->dev, "Previous job succeeded - not replaying\n"); + dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n"); if (katom->retry_count) - kbase_disjoint_state_down(kctx->kbdev); + kbase_disjoint_state_down(kbdev); return false; } if (jctx->sched_info.ctx.is_dying) { - dev_dbg(kctx->kbdev->dev, "Not replaying; context is dying\n"); + dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); if (katom->retry_count) - kbase_disjoint_state_down(kctx->kbdev); + kbase_disjoint_state_down(kbdev); return false; } /* Check job exception type and source before replaying. */ if (!kbase_replay_fault_check(katom)) { - dev_dbg(kctx->kbdev->dev, + dev_dbg(kbdev->dev, "Replay cancelled on event %x\n", katom->event_code); /* katom->event_code is already set to the failure code of the * previous job. @@ -1108,15 +1121,15 @@ bool kbase_replay_process(struct kbase_jd_atom *katom) return false; } - dev_warn(kctx->kbdev->dev, "Replaying jobs retry=%d\n", + dev_warn(kbdev->dev, "Replaying jobs retry=%d\n", katom->retry_count); katom->retry_count++; if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { - dev_err(kctx->kbdev->dev, "Replay exceeded limit - failing jobs\n"); + dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n"); - kbase_disjoint_state_down(kctx->kbdev); + kbase_disjoint_state_down(kbdev); /* katom->event_code is already set to the failure code of the previous job */ @@ -1125,7 +1138,7 @@ bool kbase_replay_process(struct kbase_jd_atom *katom) /* only enter the disjoint state once for the whole time while the replay is ongoing */ if (katom->retry_count == 1) - kbase_disjoint_state_up(kctx->kbdev); + kbase_disjoint_state_up(kbdev); INIT_WORK(&katom->work, kbase_replay_process_worker); queue_work(kctx->event_workq, &katom->work); diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c index aa2da3ab7e98..17455fe00d79 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_smc.c +++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -20,44 +20,35 @@ #include #include +#include -static noinline u32 invoke_smc_fid(u32 function_id, u64 arg0, u64 arg1, - u64 arg2, u64 *res0, u64 *res1, u64 *res2) +static noinline u64 invoke_smc_fid(u64 function_id, + u64 arg0, u64 arg1, u64 arg2) { - /* 3 args and 3 returns are chosen arbitrarily, - see SMC calling convention for limits */ asm volatile( - "mov x0, %[fid]\n" - "mov x1, %[a0]\n" - "mov x2, %[a1]\n" - "mov x3, %[a2]\n" - "smc #0\n" - "str x0, [%[re0]]\n" - "str x1, [%[re1]]\n" - "str x2, [%[re2]]\n" - : [fid] "+r" (function_id), [a0] "+r" (arg0), - [a1] "+r" (arg1), [a2] "+r" (arg2) - : [re0] "r" (res0), [re1] "r" (res1), [re2] "r" (res2) - : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", - "x14", "x15", "x16", "x17"); + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + return function_id; } -void kbase_invoke_smc_fid(u32 fid) +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) { - u64 res0, res1, res2; - /* Is fast call (bit 31 set) */ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); /* bits 16-23 must be zero for fast calls */ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); - invoke_smc_fid(fid, 0, 0, 0, &res0, &res1, &res2); + return invoke_smc_fid(fid, arg0, arg1, arg2); } -void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1, - u64 arg2, u64 *res0, u64 *res1, u64 *res2) +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2) { u32 fid = 0; @@ -65,12 +56,13 @@ void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1, KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ - /* Bit 30: 1=SMC64, 0=SMC32 */ + if (smc64) + fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ fid |= oen; /* Bit 29:24: OEN */ /* Bit 23:16: Must be zero for fast calls */ fid |= (function_number); /* Bit 15:0: function number */ - invoke_smc_fid(fid, arg0, arg1, arg2, res0, res1, res2); + return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); } #endif /* CONFIG_ARM64 */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h index 2d0f5086f754..9bff3d2e8b4d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_smc.h +++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h @@ -25,6 +25,7 @@ #include #define SMC_FAST_CALL (1 << 31) +#define SMC_64 (1 << 30) #define SMC_OEN_OFFSET 24 #define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ @@ -33,25 +34,33 @@ /** - * kbase_invoke_smc_fid - Does a secure monitor call with the given function_id - * @function_id: The SMC function to call, see SMC Calling convention. + * kbase_invoke_smc_fid - Perform a secure monitor call + * @fid: The SMC function to call, see SMC Calling convention. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC. */ -void kbase_invoke_smc_fid(u32 function_id); +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); /** - * kbase_invoke_smc_fid - Does a secure monitor call with the given parameters. - * see SMC Calling Convention for details + * kbase_invoke_smc_fid - Perform a secure monitor call * @oen: Owning Entity number (SIP, STD etc). - * @function_number: ID specifiy which function within the OEN. - * @arg0: argument 0 to pass in the SMC call. - * @arg1: argument 1 to pass in the SMC call. - * @arg2: argument 2 to pass in the SMC call. - * @res0: result 0 returned from the SMC call. - * @res1: result 1 returned from the SMC call. - * @res2: result 2 returned from the SMC call. + * @function_number: The function number within the OEN. + * @smc64: use SMC64 calling convention instead of SMC32. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC call. */ -void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1, - u64 arg2, u64 *res0, u64 *res1, u64 *res2); +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2); #endif /* CONFIG_ARM64 */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 72b682a11e9b..108c49d9150a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -139,7 +139,7 @@ static void complete_soft_job(struct kbase_jd_atom *katom) mutex_lock(&kctx->jctx.lock); list_del(&katom->dep_item[0]); kbase_finish_soft_job(katom); - if (jd_done_nolock(katom)) + if (jd_done_nolock(katom, NULL)) kbase_js_sched_all(kctx->kbdev); mutex_unlock(&kctx->jctx.lock); } @@ -278,7 +278,7 @@ finish_softjob: kbase_finish_soft_job(katom); - if (jd_done_nolock(katom)) + if (jd_done_nolock(katom, NULL)) kbase_js_sched_all(katom->kctx->kbdev); } #endif /* CONFIG_SYNC */ @@ -443,7 +443,7 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); - resched |= jd_done_nolock(katom_iter); + resched |= jd_done_nolock(katom_iter, NULL); } else { /* The job has not completed */ KBASE_DEBUG_ASSERT((katom_iter->core_req & diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 871dc316c99c..99428d1e660e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -126,15 +126,26 @@ enum tl_msg_id { KBASE_TL_NEW_GPU, KBASE_TL_NEW_LPU, KBASE_TL_NEW_ATOM, + KBASE_TL_NEW_AS, KBASE_TL_DEL_CTX, KBASE_TL_DEL_ATOM, KBASE_TL_LIFELINK_LPU_GPU, + KBASE_TL_LIFELINK_AS_GPU, KBASE_TL_RET_GPU_CTX, KBASE_TL_RET_ATOM_CTX, KBASE_TL_RET_ATOM_LPU, KBASE_TL_NRET_GPU_CTX, KBASE_TL_NRET_ATOM_CTX, KBASE_TL_NRET_ATOM_LPU, + KBASE_TL_RET_AS_CTX, + KBASE_TL_NRET_AS_CTX, + KBASE_TL_RET_ATOM_AS, + KBASE_TL_NRET_ATOM_AS, + KBASE_TL_ATTRIB_ATOM_CONFIG, + KBASE_TL_ATTRIB_AS_CONFIG, + + /* Job dump specific events (part of timeline stream). */ + KBASE_JD_GPU_SOFT_RESET, /* Timeline non-object events. */ KBASE_AUX_PM_STATE, @@ -279,12 +290,19 @@ static const struct tp_desc tp_desc_obj[] = { "@pI", "atom,atom_nr" }, + { + KBASE_TL_NEW_AS, + __stringify(KBASE_TL_NEW_AS), + "address space object is created", + "@pI", + "address_space,as_nr" + }, { KBASE_TL_DEL_CTX, __stringify(KBASE_TL_DEL_CTX), "context is destroyed", "@p", - "context" + "ctx" }, { KBASE_TL_DEL_ATOM, @@ -300,6 +318,13 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "lpu,gpu" }, + { + KBASE_TL_LIFELINK_AS_GPU, + __stringify(KBASE_TL_LIFELINK_AS_GPU), + "address space is deleted with gpu", + "@pp", + "address_space,gpu" + }, { KBASE_TL_RET_GPU_CTX, __stringify(KBASE_TL_RET_GPU_CTX), @@ -342,6 +367,55 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom,lpu" }, + { + KBASE_TL_RET_AS_CTX, + __stringify(KBASE_TL_RET_AS_CTX), + "address space is retained by context", + "@pp", + "address_space,ctx" + }, + { + KBASE_TL_NRET_AS_CTX, + __stringify(KBASE_TL_NRET_AS_CTX), + "address space is released by context", + "@pp", + "address_space,ctx" + }, + { + KBASE_TL_RET_ATOM_AS, + __stringify(KBASE_TL_RET_ATOM_AS), + "atom is retained by address space", + "@pp", + "atom,address_space" + }, + { + KBASE_TL_NRET_ATOM_AS, + __stringify(KBASE_TL_NRET_ATOM_AS), + "atom is released by address space", + "@pp", + "atom,address_space" + }, + { + KBASE_TL_ATTRIB_ATOM_CONFIG, + __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), + "atom job slot attributes", + "@pLLI", + "atom,descriptor,affinity,config" + }, + { + KBASE_TL_ATTRIB_AS_CONFIG, + __stringify(KBASE_TL_ATTRIB_AS_CONFIG), + "address space attributes", + "@pLLL", + "address_space,transtab,memattr,transcfg" + }, + { + KBASE_JD_GPU_SOFT_RESET, + __stringify(KBASE_JD_GPU_SOFT_RESET), + "gpu soft reset", + "@p", + "gpu" + }, }; /* Descriptors of timeline messages transmitted in auxiliary events stream. */ @@ -1349,6 +1423,56 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } +void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +{ + const u32 msg_id = KBASE_TL_NEW_AS; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &nr, sizeof(nr)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + +void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + /*****************************************************************************/ void kbase_tlstream_tl_new_ctx(void *context, u32 nr) @@ -1597,6 +1721,191 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } +void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +{ + const u32 msg_id = KBASE_TL_RET_AS_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ctx, sizeof(ctx)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +{ + const u32 msg_id = KBASE_TL_NRET_AS_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ctx, sizeof(ctx)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_AS; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_AS; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + + sizeof(jd) + sizeof(affinity) + sizeof(config); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &jd, sizeof(jd)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &affinity, sizeof(affinity)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &config, sizeof(config)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg) +{ + const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(as) + + sizeof(transtab) + sizeof(memattr) + sizeof(transcfg); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &as, sizeof(as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &transtab, sizeof(transtab)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &memattr, sizeof(memattr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &transcfg, sizeof(transcfg)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_jd_gpu_soft_reset(void *gpu) +{ + const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(gpu); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + /*****************************************************************************/ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 494f5c784f69..30171044ee1d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -151,6 +151,29 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); */ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +/** + * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary + * @as: name of the address space object + * @nr: sequential number assigned to this address space + * + * Function emits a timeline message informing about address space creation. + * Address space is created with one attribute: number identifying this + * address space. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); + +/** + * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU + * @as: name of the address space object + * @gpu: name of the GPU object + * + * Function emits a timeline message informing that address space object + * shall be deleted along with GPU object. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); + /** * kbase_tlstream_tl_new_ctx - create context object in timeline * @context: name of the context object @@ -251,6 +274,79 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); */ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +/** + * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context + * @as: name of the address space object + * @ctx: name of the context object + * + * Function emits a timeline message informing that address space object + * is being held by the context object. + */ +void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); + +/** + * kbase_tlstream_tl_nret_as_ctx - release address space by context + * @as: name of the address space object + * @ctx: name of the context object + * + * Function emits a timeline message informing that address space object + * is being released by atom. + */ +void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); + +/** + * kbase_tlstream_tl_ret_atom_as - retain atom by address space + * @atom: name of the atom object + * @as: name of the address space object + * + * Function emits a timeline message informing that atom object is being held + * by address space and must not be deleted unless it is released. + */ +void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); + +/** + * kbase_tlstream_tl_nret_atom_as - release atom by address space + * @atom: name of the atom object + * @as: name of the address space object + * + * Function emits a timeline message informing that atom object is being + * released by address space. + */ +void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); + +/** + * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes + * @atom: name of the atom object + * @jd: job descriptor address + * @affinity: job affinity + * @config: job config + * + * Function emits a timeline message containing atom attributes. + */ +void kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config); + +/** + * kbase_tlstream_tl_attrib_as_config - address space attributes + * @as: assigned address space + * @transtab: configuration of the TRANSTAB register + * @memattr: configuration of the MEMATTR register + * @transcfg: configuration of the TRANSCFG register (or zero if not present) + * + * Function emits a timeline message containing address space attributes. + */ +void kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg); + +/** + * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset + * @gpu: name of the GPU object + * + * This imperative tracepoint is specific to job dumping. + * Function emits a timeline message indicating GPU soft reset. + */ +void kbase_tlstream_jd_gpu_soft_reset(void *gpu); + /** * kbase_tlstream_aux_pm_state - timeline message: power management state * @core_type: core type (shader, tiler, l2 cache, l3 cache) diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h index 6a3cd407e4a6..d92caf054804 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -213,6 +213,7 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); js, _producerof_atom_number_completed); \ } while (0) + /** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a * certin caller */ #define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ @@ -318,6 +319,7 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); #define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() + static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h index a7dc3faf4c7f..156a95a67f4a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -130,5 +130,11 @@ /* * HW Events */ - KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"), - KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain stop (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed") + KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT, +"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"), + KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, +"HW: Job Chain start (SW approximated)", "%d,%d,%d", +"_tgid,job_slot,_consumerof_atom_number_ready"), + KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, +"HW: Job Chain stop (SW approximated)", "%d,%d,%d", +"_tgid,job_slot,_producerof_atom_number_completed") diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index c5ebdd21d799..5943acb19874 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -41,7 +41,7 @@ #include "mali_kbase_gpuprops_types.h" -#define BASE_UK_VERSION_MAJOR 9 +#define BASE_UK_VERSION_MAJOR 10 #define BASE_UK_VERSION_MINOR 0 struct kbase_uk_mem_alloc { @@ -112,10 +112,6 @@ struct kbase_uk_post_term { union uk_header header; }; -struct kbase_uk_dump_fault_term { - union uk_header header; -}; - struct kbase_uk_sync_now { union uk_header header; @@ -139,6 +135,33 @@ struct kbase_uk_hwcnt_setup { /* OUT */ }; +/** + * struct kbase_uk_hwcnt_reader_setup - User/Kernel space data exchange structure + * @header: UK structure header + * @buffer_count: requested number of dumping buffers + * @jm_bm: counters selection bitmask (JM) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + * @fd: dumping notification file descriptor + * + * This structure sets up HWC dumper/reader for this context. + * Multiple instances can be created for single context. + */ +struct kbase_uk_hwcnt_reader_setup { + union uk_header header; + + /* IN */ + u32 buffer_count; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + + /* OUT */ + s32 fd; +}; + struct kbase_uk_hwcnt_dump { union uk_header header; }; @@ -164,137 +187,6 @@ struct kbase_uk_stream_create { u32 padding; }; -#ifdef BASE_LEGACY_UK7_SUPPORT -/** - * This structure is kept for the backward compatibility reasons. - * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE - * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call - * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having - * the function for reading cpu properties moved from base to osu. - */ -#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN ((u32)0x00000001) -struct base_cpu_id_props { - /** - * CPU ID - */ - u32 id; - - /** - * CPU Part number - */ - u16 part; - /** - * ASCII code of implementer trademark - */ - u8 implementer; - - /** - * CPU Variant - */ - u8 variant; - /** - * CPU Architecture - */ - u8 arch; - - /** - * CPU revision - */ - u8 rev; - - /** - * Validity of CPU id where 0-invalid and - * 1-valid only if ALL the cpu_id props are valid - */ - u8 valid; - - u8 padding[1]; -}; - -/** - * This structure is kept for the backward compatibility reasons. - * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE - * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call - * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having - * the function for reading cpu properties moved from base to osu. - */ -struct base_cpu_props { - u32 nr_cores; /**< Number of CPU cores */ - - /** - * CPU page size as a Logarithm to Base 2. The compile-time - * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2 - */ - u32 cpu_page_size_log2; - - /** - * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time - * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2. - */ - u32 cpu_l1_dcache_line_size_log2; - - /** - * CPU L1 Data cache size, in bytes. The compile-time equivalient is - * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE. - * - * This CPU Property is mainly provided to implement OpenCL's - * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE - * hint to be queried. - */ - u32 cpu_l1_dcache_size; - - /** - * CPU Property Flags bitpattern. - * - * This is a combination of bits as specified by the macros prefixed with - * 'BASE_CPU_PROPERTY_FLAG_'. - */ - u32 cpu_flags; - - /** - * Maximum clock speed in MHz. - * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's - * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint. - */ - u32 max_cpu_clock_speed_mhz; - - /** - * @brief Total memory, in bytes. - * - * This is the theoretical maximum memory available to the CPU. It is - * unlikely that a client will be able to allocate all of this memory for - * their own purposes, but this at least provides an upper bound on the - * memory available to the CPU. - * - * This is required for OpenCL's clGetDeviceInfo() call when - * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices. - */ - u64 available_memory_size; - - /** - * CPU ID detailed info - */ - struct base_cpu_id_props cpu_id; - - u32 padding; -}; - -/** - * This structure is kept for the backward compatibility reasons. - * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE - * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call - * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having - * the function for reading cpu properties moved from base to osu. - */ -struct kbase_uk_cpuprops { - union uk_header header; - - /* IN */ - struct base_cpu_props props; - /* OUT */ -}; -#endif /* BASE_LEGACY_UK7_SUPPORT */ - struct kbase_uk_gpuprops { union uk_header header; @@ -405,6 +297,14 @@ struct kbase_uk_ext_buff_kds_data { u32 padding; }; +#ifdef BASE_LEGACY_UK8_SUPPORT +struct kbase_uk_keep_gpu_powered { + union uk_header header; + u32 enabled; + u32 padding; +}; +#endif /* BASE_LEGACY_UK8_SUPPORT */ + struct kbase_uk_profiling_controls { union uk_header header; u32 profiling_controls[FBDUMP_CONTROL_MAX]; @@ -514,9 +414,6 @@ enum kbase_uk_function_id { KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11), KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12), -#ifdef BASE_LEGACY_UK7_SUPPORT - KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE = (UK_FUNC_ID + 13), -#endif /* BASE_LEGACY_UK7_SUPPORT */ KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14), KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), @@ -529,6 +426,10 @@ enum kbase_uk_function_id { KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20), KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21), +#ifdef BASE_LEGACY_UK8_SUPPORT + KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22), +#endif /* BASE_LEGACY_UK8_SUPPORT */ + KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23), KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24), KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25), @@ -541,8 +442,6 @@ enum kbase_uk_function_id { KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28), KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29), - KBASE_FUNC_DUMP_FAULT_TERM = (UK_FUNC_ID + 30), - KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), #if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ @@ -555,6 +454,8 @@ enum kbase_uk_function_id { KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), #endif /* MALI_KTLSTREAM_ENABLED */ + KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), + KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index d1c68702e9c6..bfa8bfa0e14f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -15,247 +15,530 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include +/*****************************************************************************/ + +/* Hwcnt reader API version */ +#define HWCNT_READER_API 1 + +/* The number of nanoseconds in a second. */ +#define NSECS_IN_SEC 1000000000ull /* ns */ + +/* The time resolution of dumping service. */ +#define DUMPING_RESOLUTION 500000ull /* ns */ + +/* The maximal supported number of dumping buffers. */ +#define MAX_BUFFER_COUNT 32 + +/* Size and number of hw counters blocks. */ #define NR_CNT_BLOCKS_PER_GROUP 8 #define NR_CNT_PER_BLOCK 64 #define NR_BYTES_PER_CNT 4 #define NR_BYTES_PER_HDR 16 #define PRFCNT_EN_MASK_OFFSET 0x8 +/*****************************************************************************/ + +enum { + SHADER_HWCNT_BM, + TILER_HWCNT_BM, + MMU_L2_HWCNT_BM, + JM_HWCNT_BM +}; + +/** + * struct kbase_vinstr_context - vinstr context per device + * @lock: protects the entire vinstr context + * @kbdev: pointer to kbase device + * @kctx: pointer to kbase context + * @vmap: vinstr vmap for mapping hwcnt dump buffer + * @gpu_va: GPU hwcnt dump buffer address + * @cpu_va: the CPU side mapping of the hwcnt dump buffer + * @dump_size: size of the dump buffer in bytes + * @bitmap: current set of counters monitored, not always in sync + * with hardware + * @reprogram: when true, reprogram hwcnt block with the new set of + * counters + * @suspended: when true, the context has been suspended + * @nclients: number of attached clients, pending or otherwise + * @waiting_clients: head of list of clients being periodically sampled + * @idle_clients: head of list of clients being idle + * @suspended_clients: head of list of clients being suspended + * @thread: periodic sampling thread + * @waitq: notification queue of sampling thread + * @request_pending: request for action for sampling thread + */ struct kbase_vinstr_context { - struct kbase_device *kbdev; - struct kbase_context *kctx; + struct mutex lock; + struct kbase_device *kbdev; + struct kbase_context *kctx; + struct kbase_vmap_struct vmap; - struct mutex lock; - u64 gpu_va; - void *cpu_va; - size_t dump_size; - u32 nclients; - struct list_head clients; + u64 gpu_va; + void *cpu_va; + size_t dump_size; + u32 bitmap[4]; + bool reprogram; + bool suspended; + + u32 nclients; + struct list_head waiting_clients; + struct list_head idle_clients; + struct list_head suspended_clients; + + struct task_struct *thread; + wait_queue_head_t waitq; + atomic_t request_pending; }; +/** + * struct kbase_vinstr_client - a vinstr client attached to a vinstr context + * @vinstr_ctx: vinstr context client is attached to + * @list: node used to attach this client to list in vinstr context + * @buffer_count: number of buffers this client is using + * @event_mask: events this client reacts to + * @dump_size: size of one dump buffer in bytes + * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters + * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) + * @accum_buffer: temporary accumulation buffer for preserving counters + * @dump_time: next time this clients shall request hwcnt dump + * @dump_interval: interval between periodic hwcnt dumps + * @dump_buffers: kernel hwcnt dump buffers allocated by this client + * @dump_buffers_meta: metadata of dump buffers + * @meta_idx: index of metadata being accessed by userspace + * @read_idx: index of buffer read by userspace + * @write_idx: index of buffer being written by dumping service + * @waitq: client's notification queue + * @pending: when true, client has attached but hwcnt not yet updated + */ struct kbase_vinstr_client { - bool kernel; - void *dump_buffer; - u32 bitmap[4]; - void *accum_buffer; - size_t dump_size; - struct list_head list; + struct kbase_vinstr_context *vinstr_ctx; + struct list_head list; + unsigned int buffer_count; + u32 event_mask; + size_t dump_size; + u32 bitmap[4]; + void __user *legacy_buffer; + void *accum_buffer; + u64 dump_time; + u32 dump_interval; + char *dump_buffers; + struct kbase_hwcnt_reader_metadata *dump_buffers_meta; + atomic_t meta_idx; + atomic_t read_idx; + atomic_t write_idx; + wait_queue_head_t waitq; + bool pending; }; -static int map_kernel_dump_buffer(struct kbase_vinstr_context *ctx) +/** + * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer + * @hrtimer: high resolution timer + * @vinstr_ctx: vinstr context + */ +struct kbasep_vinstr_wake_up_timer { + struct hrtimer hrtimer; + struct kbase_vinstr_context *vinstr_ctx; +}; + +/*****************************************************************************/ + +static int kbasep_vinstr_service_task(void *data); + +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait); +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg); +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma); +static int kbasep_vinstr_hwcnt_reader_release( + struct inode *inode, + struct file *filp); + +/* The timeline stream file operations structure. */ +static const struct file_operations vinstr_client_fops = { + .poll = kbasep_vinstr_hwcnt_reader_poll, + .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .mmap = kbasep_vinstr_hwcnt_reader_mmap, + .release = kbasep_vinstr_hwcnt_reader_release, +}; + +/*****************************************************************************/ + +static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +{ + struct kbase_uk_hwcnt_setup setup; + + setup.dump_buffer = vinstr_ctx->gpu_va; + setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; + setup.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; + setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; + setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; + + return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup); +} + +static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +{ + kbase_instr_hwcnt_disable(vinstr_ctx->kctx); +} + +static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +{ + disable_hwcnt(vinstr_ctx); + return enable_hwcnt(vinstr_ctx); +} + +static void hwcnt_bitmap_set(u32 dst[4], u32 src[4]) +{ + dst[JM_HWCNT_BM] = src[JM_HWCNT_BM]; + dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM]; + dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM]; + dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM]; +} + +static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) +{ + dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM]; + dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM]; + dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM]; + dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; +} + +static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) +{ + struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; + size_t dump_size; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { + u32 nr_cg; + + nr_cg = kbdev->gpu_props.num_core_groups; + dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * + NR_CNT_PER_BLOCK * + NR_BYTES_PER_CNT; + } else { + /* assume v5 for now */ + base_gpu_props *props = &kbdev->gpu_props.props; + u32 nr_l2 = props->l2_props.num_l2_slices; + u64 core_mask = props->coherency_info.group[0].core_mask; + u32 nr_blocks = fls64(core_mask); + + /* JM and tiler counter blocks are always present */ + dump_size = (2 + nr_l2 + nr_blocks) * + NR_CNT_PER_BLOCK * + NR_BYTES_PER_CNT; + } + return dump_size; +} + +static int kbasep_vinstr_map_kernel_dump_buffer( + struct kbase_vinstr_context *vinstr_ctx) { struct kbase_va_region *reg; - struct kbase_context *kctx = ctx->kctx; + struct kbase_context *kctx = vinstr_ctx->kctx; u64 flags, nr_pages; u16 va_align = 0; flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; - ctx->dump_size = kbase_vinstr_dump_size(ctx); - nr_pages = PFN_UP(ctx->dump_size); + vinstr_ctx->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + nr_pages = PFN_UP(vinstr_ctx->dump_size); reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, - &ctx->gpu_va, &va_align); + &vinstr_ctx->gpu_va, &va_align); if (!reg) return -ENOMEM; - ctx->cpu_va = kbase_vmap(kctx, ctx->gpu_va, ctx->dump_size, &ctx->vmap); - if (!ctx->cpu_va) { - kbase_mem_free(kctx, ctx->gpu_va); + vinstr_ctx->cpu_va = kbase_vmap( + kctx, + vinstr_ctx->gpu_va, + vinstr_ctx->dump_size, + &vinstr_ctx->vmap); + if (!vinstr_ctx->cpu_va) { + kbase_mem_free(kctx, vinstr_ctx->gpu_va); return -ENOMEM; } return 0; } -static void unmap_kernel_dump_buffer(struct kbase_vinstr_context *ctx) +static void kbasep_vinstr_unmap_kernel_dump_buffer( + struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_context *kctx = ctx->kctx; - - kbase_vunmap(kctx, &ctx->vmap); - kbase_mem_free(kctx, ctx->gpu_va); -} + struct kbase_context *kctx = vinstr_ctx->kctx; -static int map_client_accum_buffer(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli) -{ - cli->dump_size = kbase_vinstr_dump_size(ctx); - cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); - return !cli->accum_buffer ? -ENOMEM : 0; + kbase_vunmap(kctx, &vinstr_ctx->vmap); + kbase_mem_free(kctx, vinstr_ctx->gpu_va); } -static void unmap_client_accum_buffer(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli) -{ - kfree(cli->accum_buffer); -} - -static int create_vinstr_kctx(struct kbase_vinstr_context *ctx) +/** + * kbasep_vinstr_create_kctx - create kernel context for vinstr + * @vinstr_ctx: vinstr context + * Return: zero on success + */ +static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_uk_hwcnt_setup setup; int err; - ctx->kctx = kbase_create_context(ctx->kbdev, true); - if (!ctx->kctx) + vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); + if (!vinstr_ctx->kctx) return -ENOMEM; /* Map the master kernel dump buffer. The HW dumps the counters * into this memory region. */ - err = map_kernel_dump_buffer(ctx); + err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx); if (err) { - kbase_destroy_context(ctx->kctx); + kbase_destroy_context(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; return err; } - setup.dump_buffer = ctx->gpu_va; - /* The GPU requires us to disable the prfcnt collection block for - * reprogramming it. This introduces jitter and disrupts existing - * clients. Therefore we enable all of them. */ - setup.jm_bm = 0xffffffff; - setup.tiler_bm = 0xffffffff; - setup.shader_bm = 0xffffffff; - setup.mmu_l2_bm = 0xffffffff; - - err = kbase_instr_hwcnt_enable(ctx->kctx, &setup); + err = enable_hwcnt(vinstr_ctx); if (err) { - unmap_kernel_dump_buffer(ctx); - kbase_destroy_context(ctx->kctx); + kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); + kbase_destroy_context(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; return err; } - return 0; -} + vinstr_ctx->thread = kthread_run( + kbasep_vinstr_service_task, + vinstr_ctx, + "mali_vinstr_service"); + if (!vinstr_ctx->thread) { + disable_hwcnt(vinstr_ctx); + kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); + kbase_destroy_context(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; + return -EFAULT; + } -static void destroy_vinstr_kctx(struct kbase_vinstr_context *ctx) -{ - kbase_instr_hwcnt_disable(ctx->kctx); - unmap_kernel_dump_buffer(ctx); - kbase_destroy_context(ctx->kctx); - ctx->kctx = NULL; + return 0; } -struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) +/** + * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context + * @vinstr_ctx: vinstr context + */ +static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_vinstr_context *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return NULL; - INIT_LIST_HEAD(&ctx->clients); - mutex_init(&ctx->lock); - ctx->kbdev = kbdev; - return ctx; + /* Release hw counters dumping resources. */ + vinstr_ctx->thread = NULL; + disable_hwcnt(vinstr_ctx); + kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); + kbase_destroy_context(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; } -void kbase_vinstr_term(struct kbase_vinstr_context *ctx) +/** + * kbasep_vinstr_attach_client - Attach a client to the vinstr core + * @vinstr_ctx: vinstr context + * @buffer_count: requested number of dump buffers + * @bitmap: bitmaps describing which counters should be enabled + * @argp: pointer where notification descriptor shall be stored + * + * Return: vinstr opaque client handle or NULL on failure + */ +static struct kbase_vinstr_client *kbasep_vinstr_attach_client( + struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, + u32 bitmap[4], void *argp) { + struct task_struct *thread = NULL; struct kbase_vinstr_client *cli; - while (!list_empty(&ctx->clients)) { - cli = list_first_entry(&ctx->clients, - struct kbase_vinstr_client, list); - list_del(&cli->list); - unmap_client_accum_buffer(ctx, cli); - kfree(cli); - ctx->nclients--; - } - if (ctx->kctx) - destroy_vinstr_kctx(ctx); - kfree(ctx); -} + KBASE_DEBUG_ASSERT(vinstr_ctx); + KBASE_DEBUG_ASSERT(argp); + KBASE_DEBUG_ASSERT(buffer_count >= 0); + KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); + KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); -struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx, - bool kernel, u64 dump_buffer, u32 bitmap[4]) -{ - struct kbase_vinstr_client *cli; - - cli = kmalloc(sizeof(*cli), GFP_KERNEL); + cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) return NULL; - cli->kernel = kernel; - cli->dump_buffer = (void *)(uintptr_t)dump_buffer; - cli->bitmap[SHADER_HWCNT_BM] = bitmap[SHADER_HWCNT_BM]; - cli->bitmap[TILER_HWCNT_BM] = bitmap[TILER_HWCNT_BM]; - cli->bitmap[MMU_L2_HWCNT_BM] = bitmap[MMU_L2_HWCNT_BM]; - cli->bitmap[JM_HWCNT_BM] = bitmap[JM_HWCNT_BM]; + cli->vinstr_ctx = vinstr_ctx; + cli->buffer_count = buffer_count; + cli->event_mask = + (1 << BASE_HWCNT_READER_EVENT_MANUAL) | + (1 << BASE_HWCNT_READER_EVENT_PERIODIC); + cli->pending = true; + + hwcnt_bitmap_set(cli->bitmap, bitmap); + + mutex_lock(&vinstr_ctx->lock); + + hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); + vinstr_ctx->reprogram = true; - mutex_lock(&ctx->lock); /* If this is the first client, create the vinstr kbase - * context. This context is permanently resident until the + * context. This context is permanently resident until the * last client exits. */ - if (!ctx->nclients) { - if (create_vinstr_kctx(ctx) < 0) { - kfree(cli); - mutex_unlock(&ctx->lock); - return NULL; - } + if (!vinstr_ctx->nclients) { + hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); + if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) + goto error; + + vinstr_ctx->reprogram = false; + cli->pending = false; } /* The GPU resets the counter block every time there is a request - * to dump it. We need a per client kernel buffer for accumulating + * to dump it. We need a per client kernel buffer for accumulating * the counters. */ - if (map_client_accum_buffer(ctx, cli) < 0) { - kfree(cli); - if (!ctx->nclients) - destroy_vinstr_kctx(ctx); - mutex_unlock(&ctx->lock); - return NULL; + cli->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); + if (!cli->accum_buffer) + goto error; + + /* Prepare buffers. */ + if (cli->buffer_count) { + int *fd = (int *)argp; + size_t tmp; + + /* Allocate area for buffers metadata storage. */ + tmp = sizeof(struct kbase_hwcnt_reader_metadata) * + cli->buffer_count; + cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL); + if (!cli->dump_buffers_meta) + goto error; + + /* Allocate required number of dumping buffers. */ + cli->dump_buffers = (char *)__get_free_pages( + GFP_KERNEL, + get_order(cli->dump_size * cli->buffer_count)); + if (!cli->dump_buffers) + goto error; + + /* Create descriptor for user-kernel data exchange. */ + *fd = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + cli, + O_RDONLY | O_CLOEXEC); + if (0 > *fd) + goto error; + } else { + cli->legacy_buffer = (void __user *)argp; } - ctx->nclients++; - list_add(&cli->list, &ctx->clients); - mutex_unlock(&ctx->lock); + atomic_set(&cli->read_idx, 0); + atomic_set(&cli->meta_idx, 0); + atomic_set(&cli->write_idx, 0); + init_waitqueue_head(&cli->waitq); + + vinstr_ctx->nclients++; + list_add(&cli->list, &vinstr_ctx->idle_clients); + + mutex_unlock(&vinstr_ctx->lock); return cli; + +error: + kfree(cli->dump_buffers_meta); + if (cli->dump_buffers) + free_pages( + (unsigned long)cli->dump_buffers, + get_order(cli->dump_size * cli->buffer_count)); + kfree(cli->accum_buffer); + if (!vinstr_ctx->nclients && vinstr_ctx->kctx) { + thread = vinstr_ctx->thread; + kbasep_vinstr_destroy_kctx(vinstr_ctx); + } + kfree(cli); + + mutex_unlock(&vinstr_ctx->lock); + + /* Thread must be stopped after lock is released. */ + if (thread) + kthread_stop(thread); + + return NULL; } -void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli) +/** + * kbasep_vinstr_detach_client - Detach a client from the vinstr core + * @cli: Pointer to vinstr client + */ +static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli) { - struct kbase_vinstr_client *iter, *tmp; + struct kbase_vinstr_context *vinstr_ctx; + struct kbase_vinstr_client *iter, *tmp; + struct task_struct *thread = NULL; + u32 zerobitmap[4] = { 0 }; + int cli_found = 0; + + KBASE_DEBUG_ASSERT(cli); + vinstr_ctx = cli->vinstr_ctx; + KBASE_DEBUG_ASSERT(vinstr_ctx); + + mutex_lock(&vinstr_ctx->lock); - mutex_lock(&ctx->lock); - list_for_each_entry_safe(iter, tmp, &ctx->clients, list) { + list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { if (iter == cli) { + vinstr_ctx->reprogram = true; + cli_found = 1; list_del(&iter->list); - unmap_client_accum_buffer(ctx, cli); - kfree(iter); - ctx->nclients--; - if (!ctx->nclients) - destroy_vinstr_kctx(ctx); break; } } - mutex_unlock(&ctx->lock); -} + if (!cli_found) { + list_for_each_entry_safe( + iter, tmp, &vinstr_ctx->waiting_clients, list) { + if (iter == cli) { + vinstr_ctx->reprogram = true; + cli_found = 1; + list_del(&iter->list); + break; + } + } + } + KBASE_DEBUG_ASSERT(cli_found); -size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx) -{ - struct kbase_device *kbdev = ctx->kctx->kbdev; - size_t dump_size; + kfree(cli->dump_buffers_meta); + free_pages( + (unsigned long)cli->dump_buffers, + get_order(cli->dump_size * cli->buffer_count)); + kfree(cli->accum_buffer); + kfree(cli); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - u32 nr_cg; + vinstr_ctx->nclients--; + if (!vinstr_ctx->nclients) { + thread = vinstr_ctx->thread; + kbasep_vinstr_destroy_kctx(vinstr_ctx); + } - nr_cg = kbdev->gpu_props.num_core_groups; - dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } else { - /* assume v5 for now */ - u32 nr_l2, nr_sc; + /* Rebuild context bitmap now that the client has detached */ + hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); + list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) + hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) + hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; - nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores; - /* JM and tiler counter blocks are always present */ - dump_size = (2 + nr_l2 + nr_sc) * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } - return dump_size; + mutex_unlock(&vinstr_ctx->lock); + + /* Thread must be stopped after lock is released. */ + if (thread) + kthread_stop(thread); } /* Accumulate counters in the dump buffer */ @@ -286,13 +569,14 @@ static void accum_dump_buffer(void *dst, void *src, size_t dump_size) * of the defined blocks from the master kernel buffer and then patches up * the performance counter enable mask for each of the blocks to exclude * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v4(struct kbase_vinstr_context *ctx, +static void patch_dump_buffer_hdr_v4( + struct kbase_vinstr_context *vinstr_ctx, struct kbase_vinstr_client *cli) { u32 *mask; u8 *dst = cli->accum_buffer; - u8 *src = ctx->cpu_va; - u32 nr_cg = ctx->kctx->kbdev->gpu_props.num_core_groups; + u8 *src = vinstr_ctx->cpu_va; + u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups; size_t i, group_size, group; enum { SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, @@ -359,14 +643,16 @@ static void patch_dump_buffer_hdr_v4(struct kbase_vinstr_context *ctx, * of the defined blocks from the master kernel buffer and then patches up * the performance counter enable mask for each of the blocks to exclude * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v5(struct kbase_vinstr_context *ctx, +static void patch_dump_buffer_hdr_v5( + struct kbase_vinstr_context *vinstr_ctx, struct kbase_vinstr_client *cli) { - struct kbase_device *kbdev = ctx->kctx->kbdev; - u32 i, nr_l2, nr_sc; + struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; + u32 i, nr_l2; + u64 core_mask; u32 *mask; u8 *dst = cli->accum_buffer; - u8 *src = ctx->cpu_va; + u8 *src = vinstr_ctx->cpu_va; size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; /* copy and patch job manager header */ @@ -394,92 +680,1001 @@ static void patch_dump_buffer_hdr_v5(struct kbase_vinstr_context *ctx, } /* copy and patch shader core headers */ - nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores; - for (i = 0; i < nr_sc; i++) { + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (0ull != core_mask) { memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; + if (0ull != (core_mask & 1ull)) { + /* if block is not reserved update header */ + mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + } dst += block_size; src += block_size; + + core_mask >>= 1; } } -static void accum_clients(struct kbase_vinstr_context *ctx) +/** + * accum_clients - accumulate dumped hw counters for all known clients + * @vinstr_ctx: vinstr context + */ +static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) { struct kbase_vinstr_client *iter; int v4; - v4 = kbase_hw_has_feature(ctx->kbdev, BASE_HW_FEATURE_V4); - list_for_each_entry(iter, &ctx->clients, list) { + v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); + list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { + /* Don't bother accumulating clients whose hwcnt requests + * have not yet been honoured. */ + if (iter->pending) + continue; + if (v4) + patch_dump_buffer_hdr_v4(vinstr_ctx, iter); + else + patch_dump_buffer_hdr_v5(vinstr_ctx, iter); + accum_dump_buffer( + iter->accum_buffer, + vinstr_ctx->cpu_va, + iter->dump_size); + } + list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) { + /* Don't bother accumulating clients whose hwcnt requests + * have not yet been honoured. */ + if (iter->pending) + continue; if (v4) - patch_dump_buffer_hdr_v4(ctx, iter); + patch_dump_buffer_hdr_v4(vinstr_ctx, iter); else - patch_dump_buffer_hdr_v5(ctx, iter); - accum_dump_buffer(iter->accum_buffer, ctx->cpu_va, + patch_dump_buffer_hdr_v5(vinstr_ctx, iter); + accum_dump_buffer( + iter->accum_buffer, + vinstr_ctx->cpu_va, iter->dump_size); } } -int kbase_vinstr_dump(struct kbase_vinstr_context *ctx, +/*****************************************************************************/ + +/** + * kbasep_vinstr_get_timestamp - return timestamp + * + * Function returns timestamp value based on raw monotonic timer. Value will + * wrap around zero in case of overflow. + * + * Return: timestamp value + */ +static u64 kbasep_vinstr_get_timestamp(void) +{ + struct timespec ts; + + getrawmonotonic(&ts); + return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; +} + +/** + * kbasep_vinstr_add_dump_request - register client's dumping request + * @cli: requesting client + * @waiting_clients: list of pending dumping requests + */ +static void kbasep_vinstr_add_dump_request( + struct kbase_vinstr_client *cli, + struct list_head *waiting_clients) +{ + struct kbase_vinstr_client *tmp; + + if (list_empty(waiting_clients)) { + list_add(&cli->list, waiting_clients); + return; + } + list_for_each_entry(tmp, waiting_clients, list) { + if (tmp->dump_time > cli->dump_time) { + list_add_tail(&cli->list, &tmp->list); + return; + } + } + list_add_tail(&cli->list, waiting_clients); +} + +/** + * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level + * dump and accumulate them for known + * clients + * @vinstr_ctx: vinstr context + * @timestamp: pointer where collection timestamp will be recorded + * + * Return: zero on success + */ +static int kbasep_vinstr_collect_and_accumulate( + struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) +{ + int rcode; + + /* Request HW counters dump. + * Disable preemption to make dump timestamp more accurate. */ + preempt_disable(); + *timestamp = kbasep_vinstr_get_timestamp(); + rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx); + preempt_enable(); + + if (!rcode) + rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); + WARN_ON(rcode); + + /* Accumulate values of collected counters. */ + if (!rcode) + accum_clients(vinstr_ctx); + + return rcode; +} + +/** + * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel + * buffer + * @cli: requesting client + * @timestamp: timestamp when counters were collected + * @event_id: id of event that caused triggered counters collection + * + * Return: zero on success + */ +static int kbasep_vinstr_fill_dump_buffer( + struct kbase_vinstr_client *cli, u64 timestamp, + enum base_hwcnt_reader_event event_id) +{ + unsigned int write_idx = atomic_read(&cli->write_idx); + unsigned int read_idx = atomic_read(&cli->read_idx); + + struct kbase_hwcnt_reader_metadata *meta; + void *buffer; + + /* Check if there is a place to copy HWC block into. */ + if (write_idx - read_idx == cli->buffer_count) + return -1; + write_idx %= cli->buffer_count; + + /* Fill in dump buffer and its metadata. */ + buffer = &cli->dump_buffers[write_idx * cli->dump_size]; + meta = &cli->dump_buffers_meta[write_idx]; + meta->timestamp = timestamp; + meta->event_id = event_id; + meta->buffer_idx = write_idx; + memcpy(buffer, cli->accum_buffer, cli->dump_size); + return 0; +} + +/** + * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer + * allocated in userspace + * @cli: requesting client + * + * Return: zero on success + * + * This is part of legacy ioctl interface. + */ +static int kbasep_vinstr_fill_dump_buffer_legacy( struct kbase_vinstr_client *cli) { - int err = 0; + void __user *buffer = cli->legacy_buffer; + int rcode; + + /* Copy data to user buffer. */ + rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size); + if (rcode) + pr_warn("error while copying buffer to user\n"); + return rcode; +} - if (!cli) +/** + * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst + * @vinstr_ctx: vinstr context + */ +static void kbasep_vinstr_reprogram( + struct kbase_vinstr_context *vinstr_ctx) +{ + if (vinstr_ctx->reprogram) { + struct kbase_vinstr_client *iter; + + if (!reprogram_hwcnt(vinstr_ctx)) { + vinstr_ctx->reprogram = false; + list_for_each_entry( + iter, + &vinstr_ctx->idle_clients, + list) + iter->pending = false; + list_for_each_entry( + iter, + &vinstr_ctx->waiting_clients, + list) + iter->pending = false; + } + } +} + +/** + * kbasep_vinstr_update_client - copy accumulated counters to user readable + * buffer and notify the user + * @cli: requesting client + * @timestamp: timestamp when counters were collected + * @event_id: id of event that caused triggered counters collection + * + * Return: zero on success + */ +static int kbasep_vinstr_update_client( + struct kbase_vinstr_client *cli, u64 timestamp, + enum base_hwcnt_reader_event event_id) +{ + int rcode = 0; + + /* Copy collected counters to user readable buffer. */ + if (cli->buffer_count) + rcode = kbasep_vinstr_fill_dump_buffer( + cli, timestamp, event_id); + else + rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); + + if (rcode) + goto exit; + + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&cli->write_idx); + wake_up_interruptible(&cli->waitq); + + /* Prepare for next request. */ + memset(cli->accum_buffer, 0, cli->dump_size); + +exit: + return rcode; +} + +/** + * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function + * + * @hrtimer: high resolution timer + * + * Return: High resolution timer restart enum. + */ +static enum hrtimer_restart kbasep_vinstr_wake_up_callback( + struct hrtimer *hrtimer) +{ + struct kbasep_vinstr_wake_up_timer *timer = + container_of( + hrtimer, + struct kbasep_vinstr_wake_up_timer, + hrtimer); + + KBASE_DEBUG_ASSERT(timer); + + atomic_set(&timer->vinstr_ctx->request_pending, 1); + wake_up_all(&timer->vinstr_ctx->waitq); + + return HRTIMER_NORESTART; +} + +/** + * kbasep_vinstr_service_task - HWC dumping service thread + * + * @data: Pointer to vinstr context structure. + * + * Return: Always returns zero. + */ +static int kbasep_vinstr_service_task(void *data) +{ + struct kbase_vinstr_context *vinstr_ctx = data; + struct kbasep_vinstr_wake_up_timer timer; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + hrtimer_init(&timer.hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + timer.hrtimer.function = kbasep_vinstr_wake_up_callback; + timer.vinstr_ctx = vinstr_ctx; + + while (!kthread_should_stop()) { + struct kbase_vinstr_client *cli = NULL; + struct kbase_vinstr_client *tmp; + + u64 timestamp = kbasep_vinstr_get_timestamp(); + u64 dump_time = 0; + struct list_head expired_requests; + + /* Hold lock while performing operations on lists of clients. */ + mutex_lock(&vinstr_ctx->lock); + + /* Closing thread must not interact with client requests. */ + if (current == vinstr_ctx->thread) { + atomic_set(&vinstr_ctx->request_pending, 0); + + if (!list_empty(&vinstr_ctx->waiting_clients)) { + cli = list_first_entry( + &vinstr_ctx->waiting_clients, + struct kbase_vinstr_client, + list); + dump_time = cli->dump_time; + } + } + + if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { + mutex_unlock(&vinstr_ctx->lock); + + /* Sleep until next dumping event or service request. */ + if (cli) { + u64 diff = dump_time - timestamp; + + hrtimer_start( + &timer.hrtimer, + ns_to_ktime(diff), + HRTIMER_MODE_REL); + } + wait_event( + vinstr_ctx->waitq, + atomic_read( + &vinstr_ctx->request_pending) || + kthread_should_stop()); + hrtimer_cancel(&timer.hrtimer); + continue; + } + + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, ×tamp); + + INIT_LIST_HEAD(&expired_requests); + + /* Find all expired requests. */ + list_for_each_entry_safe( + cli, + tmp, + &vinstr_ctx->waiting_clients, + list) { + s64 tdiff = + (s64)(timestamp + DUMPING_RESOLUTION) - + (s64)cli->dump_time; + if (tdiff >= 0ll) { + list_del(&cli->list); + list_add(&cli->list, &expired_requests); + } else { + break; + } + } + + /* Fill data for each request found. */ + list_for_each_entry_safe(cli, tmp, &expired_requests, list) { + /* Ensure that legacy buffer will not be used from + * this kthread context. */ + BUG_ON(0 == cli->buffer_count); + /* Expect only periodically sampled clients. */ + BUG_ON(0 == cli->dump_interval); + + kbasep_vinstr_update_client( + cli, + timestamp, + BASE_HWCNT_READER_EVENT_PERIODIC); + + /* Set new dumping time. Drop missed probing times. */ + do { + cli->dump_time += cli->dump_interval; + } while (cli->dump_time < timestamp); + + list_del(&cli->list); + kbasep_vinstr_add_dump_request( + cli, + &vinstr_ctx->waiting_clients); + } + + /* Reprogram counters set if required. */ + kbasep_vinstr_reprogram(vinstr_ctx); + + mutex_unlock(&vinstr_ctx->lock); + } + + return 0; +} + +/*****************************************************************************/ + +/** + * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers + * @cli: pointer to vinstr client structure + * + * Return: non-zero if client has at least one dumping buffer filled that was + * not notified to user yet + */ +static int kbasep_vinstr_hwcnt_reader_buffer_ready( + struct kbase_vinstr_client *cli) +{ + KBASE_DEBUG_ASSERT(cli); + return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @buffer: pointer to userspace buffer + * @size: size of buffer + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + struct kbase_vinstr_client *cli, void __user *buffer, + size_t size) +{ + unsigned int meta_idx = atomic_read(&cli->meta_idx); + unsigned int idx = meta_idx % cli->buffer_count; + + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx]; + + /* Metadata sanity check. */ + KBASE_DEBUG_ASSERT(idx == meta->buffer_idx); + + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if there is any buffer available. */ + if (atomic_read(&cli->write_idx) == meta_idx) + return -EAGAIN; + + /* Check if previously taken buffer was put back. */ + if (atomic_read(&cli->read_idx) != meta_idx) + return -EBUSY; + + /* Copy next available buffer's metadata to user. */ + if (copy_to_user(buffer, meta, size)) + return -EFAULT; + + atomic_inc(&cli->meta_idx); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @buffer: pointer to userspace buffer + * @size: size of buffer + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + struct kbase_vinstr_client *cli, void __user *buffer, + size_t size) +{ + unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int idx = read_idx % cli->buffer_count; + + struct kbase_hwcnt_reader_metadata meta; + + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if any buffer was taken. */ + if (atomic_read(&cli->meta_idx) == read_idx) + return -EPERM; + + /* Check if correct buffer is put back. */ + if (copy_from_user(&meta, buffer, size)) + return -EFAULT; + if (idx != meta.buffer_idx) + return -EINVAL; + + atomic_inc(&cli->read_idx); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @interval: periodic dumping interval (disable periodic dumping if zero) + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + struct kbase_vinstr_client *cli, u32 interval) +{ + struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return -EBUSY; + } + + list_del(&cli->list); + + cli->dump_interval = interval; + + /* If interval is non-zero, enable periodic dumping for this client. */ + if (cli->dump_interval) { + if (DUMPING_RESOLUTION > cli->dump_interval) + cli->dump_interval = DUMPING_RESOLUTION; + cli->dump_time = + kbasep_vinstr_get_timestamp() + cli->dump_interval; + + kbasep_vinstr_add_dump_request( + cli, &vinstr_ctx->waiting_clients); + + atomic_set(&vinstr_ctx->request_pending, 1); + wake_up_all(&vinstr_ctx->waitq); + } else { + list_add(&cli->list, &vinstr_ctx->idle_clients); + } + + mutex_unlock(&vinstr_ctx->lock); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id + * @event_id: id of event + * Return: event_mask or zero if event is not supported or maskable + */ +static u32 kbasep_vinstr_hwcnt_reader_event_mask( + enum base_hwcnt_reader_event event_id) +{ + u32 event_mask = 0; + + switch (event_id) { + case BASE_HWCNT_READER_EVENT_PREJOB: + case BASE_HWCNT_READER_EVENT_POSTJOB: + /* These event are maskable. */ + event_mask = (1 << event_id); + break; + + case BASE_HWCNT_READER_EVENT_MANUAL: + case BASE_HWCNT_READER_EVENT_PERIODIC: + /* These event are non-maskable. */ + default: + /* These event are not supported. */ + break; + } + + return event_mask; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @event_id: id of event to enable + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + u32 event_mask; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); + if (!event_mask) + return -EINVAL; + + mutex_lock(&vinstr_ctx->lock); + cli->event_mask |= event_mask; + mutex_unlock(&vinstr_ctx->lock); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @event_id: id of event to disable + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + u32 event_mask; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); + if (!event_mask) + return -EINVAL; + + mutex_lock(&vinstr_ctx->lock); + cli->event_mask &= ~event_mask; + mutex_unlock(&vinstr_ctx->lock); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command + * @cli: pointer to vinstr client structure + * @hwver: pointer to user buffer where hw version will be stored + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + struct kbase_vinstr_client *cli, u32 __user *hwver) +{ + struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; + u32 ver; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + ver = 4; + if (!kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) + ver = 5; + return put_user(ver, hwver); +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl + * @filp: pointer to file structure + * @cmd: user command + * @arg: command's argument + * + * Return: zero on success + */ +static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + long rcode = 0; + struct kbase_vinstr_client *cli; + + KBASE_DEBUG_ASSERT(filp); + + cli = filp->private_data; + KBASE_DEBUG_ASSERT(cli); + + if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd))) return -EINVAL; - mutex_lock(&ctx->lock); - err = kbase_instr_hwcnt_request_dump(ctx->kctx); - if (err) - goto out; + switch (cmd) { + case KBASE_HWCNT_READER_GET_API_VERSION: + rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_GET_HWVER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + cli, (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_GET_BUFFER_SIZE: + KBASE_DEBUG_ASSERT(cli->vinstr_ctx); + rcode = put_user( + (u32)cli->vinstr_ctx->dump_size, + (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_DUMP: + rcode = kbase_vinstr_hwc_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL); + break; + case KBASE_HWCNT_READER_CLEAR: + rcode = kbase_vinstr_hwc_clear(cli); + break; + case KBASE_HWCNT_READER_GET_BUFFER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case KBASE_HWCNT_READER_PUT_BUFFER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case KBASE_HWCNT_READER_SET_INTERVAL: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + cli, (u32)arg); + break; + case KBASE_HWCNT_READER_ENABLE_EVENT: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + case KBASE_HWCNT_READER_DISABLE_EVENT: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + default: + rcode = -EINVAL; + break; + } + + return rcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll + * @filp: pointer to file structure + * @wait: pointer to poll table + * Return: POLLIN if data can be read without blocking, otherwise zero + */ +static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, + poll_table *wait) +{ + struct kbase_vinstr_client *cli; - err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx); - if (err) - goto out; + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(wait); - accum_clients(ctx); + cli = filp->private_data; + KBASE_DEBUG_ASSERT(cli); + + poll_wait(filp, &cli->waitq, wait); + if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) + return POLLIN; + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap + * @filp: pointer to file structure + * @vma: pointer to vma structure + * Return: zero on success + */ +static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + struct kbase_vinstr_client *cli; + size_t size; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(vma); + + cli = filp->private_data; + KBASE_DEBUG_ASSERT(cli); + + size = cli->buffer_count * cli->dump_size; + if (vma->vm_end - vma->vm_start > size) + return -ENOMEM; + + return remap_pfn_range( + vma, + vma->vm_start, + __pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT, + size, + vma->vm_page_prot); +} - if (!cli->kernel) { - if (copy_to_user((void __user *)cli->dump_buffer, - cli->accum_buffer, cli->dump_size)) { - err = -EFAULT; - goto out; +/** + * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release + * @inode: pointer to inode structure + * @filp: pointer to file structure + * Return always return zero + */ +static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, + struct file *filp) +{ + struct kbase_vinstr_client *cli; + + KBASE_DEBUG_ASSERT(inode); + KBASE_DEBUG_ASSERT(filp); + + cli = filp->private_data; + KBASE_DEBUG_ASSERT(cli); + + kbasep_vinstr_detach_client(cli); + return 0; +} + +/*****************************************************************************/ + +struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) +{ + struct kbase_vinstr_context *vinstr_ctx; + + vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL); + if (!vinstr_ctx) + return NULL; + + INIT_LIST_HEAD(&vinstr_ctx->idle_clients); + INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); + mutex_init(&vinstr_ctx->lock); + vinstr_ctx->kbdev = kbdev; + vinstr_ctx->thread = NULL; + + atomic_set(&vinstr_ctx->request_pending, 0); + init_waitqueue_head(&vinstr_ctx->waitq); + + return vinstr_ctx; +} + +void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) +{ + struct kbase_vinstr_client *cli; + + /* Stop service thread first. */ + if (vinstr_ctx->thread) + kthread_stop(vinstr_ctx->thread); + + while (1) { + struct list_head *list = &vinstr_ctx->idle_clients; + + if (list_empty(list)) { + list = &vinstr_ctx->waiting_clients; + if (list_empty(list)) + break; } + + cli = list_first_entry(list, struct kbase_vinstr_client, list); + list_del(&cli->list); + kfree(cli->accum_buffer); + kfree(cli); + vinstr_ctx->nclients--; + } + KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); + if (vinstr_ctx->kctx) + kbasep_vinstr_destroy_kctx(vinstr_ctx); + kfree(vinstr_ctx); +} + +int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup) +{ + struct kbase_vinstr_client *cli; + u32 bitmap[4]; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + KBASE_DEBUG_ASSERT(setup); + KBASE_DEBUG_ASSERT(setup->buffer_count); + + bitmap[SHADER_HWCNT_BM] = setup->shader_bm; + bitmap[TILER_HWCNT_BM] = setup->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = setup->jm_bm; + + cli = kbasep_vinstr_attach_client( + vinstr_ctx, + setup->buffer_count, + bitmap, + &setup->fd); + + if (!cli) + return -ENOMEM; + + return 0; +} + +int kbase_vinstr_legacy_hwc_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_vinstr_client **cli, + struct kbase_uk_hwcnt_setup *setup) +{ + KBASE_DEBUG_ASSERT(vinstr_ctx); + KBASE_DEBUG_ASSERT(setup); + KBASE_DEBUG_ASSERT(cli); + + if (setup->dump_buffer) { + u32 bitmap[4]; + + bitmap[SHADER_HWCNT_BM] = setup->shader_bm; + bitmap[TILER_HWCNT_BM] = setup->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = setup->jm_bm; + + if (*cli) + return -EBUSY; + + *cli = kbasep_vinstr_attach_client( + vinstr_ctx, + 0, + bitmap, + (void *)(long)setup->dump_buffer); + + if (!(*cli)) + return -ENOMEM; } else { - memcpy(cli->dump_buffer, cli->accum_buffer, cli->dump_size); + if (!*cli) + return -EINVAL; + + kbasep_vinstr_detach_client(*cli); + *cli = NULL; } - memset(cli->accum_buffer, 0, cli->dump_size); -out: - mutex_unlock(&ctx->lock); - return err; + return 0; } -int kbase_vinstr_clear(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli) +int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) { - int err = 0; + int rcode = 0; + struct kbase_vinstr_context *vinstr_ctx; + u64 timestamp; + u32 event_mask; if (!cli) return -EINVAL; - mutex_lock(&ctx->lock); - err = kbase_instr_hwcnt_request_dump(ctx->kctx); - if (err) - goto out; + vinstr_ctx = cli->vinstr_ctx; + KBASE_DEBUG_ASSERT(vinstr_ctx); + + KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT); + event_mask = 1 << event_id; - err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx); - if (err) - goto out; + mutex_lock(&vinstr_ctx->lock); - err = kbase_instr_hwcnt_clear(ctx->kctx); - if (err) - goto out; + if (vinstr_ctx->suspended) { + rcode = -EBUSY; + goto exit; + } - accum_clients(ctx); + if (event_mask & cli->event_mask) { + rcode = kbasep_vinstr_collect_and_accumulate( + vinstr_ctx, + ×tamp); + if (rcode) + goto exit; + rcode = kbasep_vinstr_update_client(cli, timestamp, event_id); + if (rcode) + goto exit; + + kbasep_vinstr_reprogram(vinstr_ctx); + } + +exit: + mutex_unlock(&vinstr_ctx->lock); + + return rcode; +} + +int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) +{ + struct kbase_vinstr_context *vinstr_ctx; + int rcode; + u64 unused; + + if (!cli) + return -EINVAL; + + vinstr_ctx = cli->vinstr_ctx; + KBASE_DEBUG_ASSERT(vinstr_ctx); + + mutex_lock(&vinstr_ctx->lock); + + if (vinstr_ctx->suspended) { + rcode = -EBUSY; + goto exit; + } + + rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); + if (rcode) + goto exit; + rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx); + if (rcode) + goto exit; memset(cli->accum_buffer, 0, cli->dump_size); -out: - mutex_unlock(&ctx->lock); - return err; + + kbasep_vinstr_reprogram(vinstr_ctx); + +exit: + mutex_unlock(&vinstr_ctx->lock); + + return rcode; +} + +void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) +{ + u64 unused; + + KBASE_DEBUG_ASSERT(vinstr_ctx); + + mutex_lock(&vinstr_ctx->lock); + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); + vinstr_ctx->suspended = true; + vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; + INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); + mutex_unlock(&vinstr_ctx->lock); +} + +void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) +{ + KBASE_DEBUG_ASSERT(vinstr_ctx); + + mutex_lock(&vinstr_ctx->lock); + vinstr_ctx->suspended = false; + vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; + vinstr_ctx->reprogram = true; + kbasep_vinstr_reprogram(vinstr_ctx); + atomic_set(&vinstr_ctx->request_pending, 1); + wake_up_all(&vinstr_ctx->waitq); + mutex_unlock(&vinstr_ctx->lock); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index 2e846e81a800..12340e5c647d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -18,85 +18,87 @@ #ifndef _KBASE_VINSTR_H_ #define _KBASE_VINSTR_H_ -enum { - SHADER_HWCNT_BM, - TILER_HWCNT_BM, - MMU_L2_HWCNT_BM, - JM_HWCNT_BM -}; +#include +#include + +/*****************************************************************************/ struct kbase_vinstr_context; struct kbase_vinstr_client; +/*****************************************************************************/ + /** - * kbase_vinstr_init() - Initialize the vinstr core - * @kbdev: Kbase device + * kbase_vinstr_init() - initialize the vinstr core + * @kbdev: kbase device * - * Return: A pointer to the vinstr context on success or NULL on failure + * Return: pointer to the vinstr context on success or NULL on failure */ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); /** - * kbase_vinstr_term() - Terminate the vinstr core - * @ctx: Vinstr context + * kbase_vinstr_term() - terminate the vinstr core + * @vinstr_ctx: vinstr context */ -void kbase_vinstr_term(struct kbase_vinstr_context *ctx); +void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); /** - * kbase_vinstr_attach_client - Attach a client to the vinstr core - * @ctx: Vinstr context - * @kernel: True if this client is a kernel-side client, false - * otherwise - * @dump_buffer: Client's dump buffer - * @bitmap: Bitmaps describing which counters should be enabled + * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader + * @vinstr_ctx: vinstr context + * @setup: reader's configuration * - * Return: A vinstr opaque client handle or NULL or failure + * Return: zero on success */ -struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx, - bool kernel, - u64 dump_buffer, - u32 bitmap[4]); +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup); /** - * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @ctx: Vinstr context - * @cli: Pointer to vinstr client + * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping + * @vinstr_ctx: vinstr context + * @cli: pointer where to store pointer to new vinstr client structure + * @setup: hwc configuration + * + * Return: zero on success */ -void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli); +int kbase_vinstr_legacy_hwc_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_vinstr_client **cli, + struct kbase_uk_hwcnt_setup *setup); /** - * kbase_vinstr_dump_size - Get the size of the dump buffer - * @ctx: Vinstr context + * kbase_vinstr_hwc_dump - issue counter dump for vinstr client + * @cli: pointer to vinstr client + * @event_id: id of event that triggered hwcnt dump * - * This is only useful for kernel-side clients to know how much - * memory they need to allocate to receive the performance counter - * memory block. - * - * Return: Returns the size of the client side buffer + * Return: zero on success */ -size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx); +int kbase_vinstr_hwc_dump( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id); /** - * kbase_vinstr_dump - Performs a synchronous hardware counter dump for a given - * kbase context - * @ctx: Vinstr context - * @cli: Pointer to vinstr client + * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for + * a given kbase context + * @cli: pointer to vinstr client * - * Return: 0 on success + * Return: zero on success */ -int kbase_vinstr_dump(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli); +int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); /** - * kbase_vinstr_clear - Performs a reset of the hardware counters for a given - * kbase context - * @ctx: Vinstr context - * @cli: Pointer to vinstr client - * - * Return: 0 on success + * kbase_vinstr_hwc_suspend - suspends hardware counter collection for + * a given kbase context + * @vinstr_ctx: vinstr context */ -int kbase_vinstr_clear(struct kbase_vinstr_context *ctx, - struct kbase_vinstr_client *cli); +void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); + +/** + * kbase_vinstr_hwc_resume - resumes hardware counter collection for + * a given kbase context + * @vinstr_ctx: vinstr context + */ +void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); #endif /* _KBASE_VINSTR_H_ */ + diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index 180fea1dfd1f..c3def83dba24 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -169,6 +169,7 @@ #define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +#define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ #define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ #define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ @@ -424,7 +425,10 @@ #define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull /* Symbol for default MEMATTR to use */ + +/* Default is - HW implementation defined caching */ #define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 /* HW implementation defined caching */ #define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 @@ -519,7 +523,7 @@ /* COHERENCY_* values*/ #define COHERENCY_ACE_LITE 0 #define COHERENCY_ACE 1 -#define COHERENCY_NONE 0xFFFF +#define COHERENCY_NONE 31 #define COHERENCY_FEATURE_BIT(x) (1 << (x)) /* End COHERENCY_* values */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h index d8286a36009a..c3563723cb63 100644 --- a/drivers/gpu/arm/midgard/mali_timeline.h +++ b/drivers/gpu/arm/midgard/mali_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -386,6 +386,7 @@ TRACE_EVENT(mali_timeline_context_active, __entry->count) ); + #endif /* _MALI_TIMELINE_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c index 6f8a72555e3d..0b7e39502b2b 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -91,17 +91,17 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_off_callback = pm_callback_power_off, .power_suspend_callback = pm_callback_suspend, .power_resume_callback = pm_callback_resume, -#ifdef CONFIG_PM_RUNTIME +#ifdef KBASE_PM_RUNTIME .power_runtime_init_callback = kbase_device_runtime_init, .power_runtime_term_callback = kbase_device_runtime_disable, .power_runtime_on_callback = pm_callback_runtime_on, .power_runtime_off_callback = pm_callback_runtime_off, -#else /* CONFIG_PM_RUNTIME */ +#else /* KBASE_PM_RUNTIME */ .power_runtime_init_callback = NULL, .power_runtime_term_callback = NULL, .power_runtime_on_callback = NULL, .power_runtime_off_callback = NULL, -#endif /* CONFIG_PM_RUNTIME */ +#endif /* KBASE_PM_RUNTIME */ }; diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c index 86f2bb50caf0..82669510e941 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Versatile Express (VE) Juno Development Platform */ @@ -135,18 +136,44 @@ struct devfreq_cooling_ops juno_model_ops = { #endif /* CONFIG_DEVFREQ_THERMAL */ +/* + * Juno Secure Mode integration + */ + +/* SMC Function Numbers */ +#define JUNO_SMC_SECURE_ENABLE_FUNC 0xff06 +#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07 + static int juno_secure_mode_enable(struct kbase_device *kbdev) { - /* TODO: enable secure mode */ - /*dev_err(kbdev->dev, "SWITCHING TO SECURE\n");*/ - return 0; /* all ok */ + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && + kbdev->reg_start == 0x2d000000) { + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_SECURE_ENABLE_FUNC, false, + 0, 0, 0); + return ret; + } + + return -EINVAL; /* Not supported */ } static int juno_secure_mode_disable(struct kbase_device *kbdev) { - /* TODO: Turn off secure mode and reset GPU */ - /*dev_err(kbdev->dev, "SWITCHING TO NON-SECURE\n");*/ - return 0; /* all ok */ + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && + kbdev->reg_start == 0x2d000000) { + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_SECURE_DISABLE_FUNC, false, + 0, 0, 0); + return ret; + } + + return -EINVAL; /* Not supported */ } struct kbase_secure_ops juno_secure_ops = { diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript index a3aecc4a1d54..7e41a438ac53 100644 --- a/drivers/gpu/arm/midgard/sconscript +++ b/drivers/gpu/arm/midgard/sconscript @@ -42,9 +42,12 @@ kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])), Glob('#kernel/drivers/gpu/arm/midgard/*.h'), Glob('#kernel/drivers/gpu/arm/midgard/*.h'), + Glob('#kernel/drivers/gpu/arm/midgard/Makefile', + Glob('#kernel/drivers/gpu/arm/midgard/K*')) ] -kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')] +kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c'), + Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*/*.c')] if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] @@ -70,8 +73,8 @@ if fake_platform_device==1: # Note: cleaning via the Linux kernel build system does not yet work if env.GetOption('clean') : - makeAction=Action("cd ${SOURCE.dir} && make clean", '$MAKECOMSTR') - cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [makeAction]) + env.Execute(Action("make clean", '[clean] kbase')) + cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, []) else: if env['os'] == 'android': env['android'] = 1 @@ -83,7 +86,25 @@ else: else: env['kernel_test'] = 0 - makeAction=Action("cd ${SOURCE.dir} && make -j%d PLATFORM=${platform} MALI_ERROR_INJECT_ON=${error_inject} MALI_ANDROID=${android} MALI_KERNEL_TEST_API=${kernel_test} MALI_UNIT_TEST=${unit} MALI_RELEASE_NAME=\"${mali_release_name}\" MALI_MOCK_TEST=%s MALI_CUSTOMER_RELEASE=${release} MALI_INSTRUMENTATION_LEVEL=${instr} MALI_COVERAGE=${coverage} %s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % (GetOption('num_jobs'), mock_test, env.kernel_get_config_defines(fake_platform_device)), '$MAKECOMSTR') + #Extract environment options, note the trailing spaces are important + env_options = \ + "PLATFORM=${platform} " +\ + "MALI_ERROR_INJECT_ON=${error_inject} " +\ + "MALI_ANDROID=${android} " +\ + "MALI_KERNEL_TEST_API=${kernel_test} " +\ + "MALI_UNIT_TEST=${unit} " +\ + "MALI_RELEASE_NAME=\"${mali_release_name}\" "+\ + "MALI_MOCK_TEST=%s " % mock_test +\ + "MALI_CUSTOMER_RELEASE=${release} " +\ + "MALI_INSTRUMENTATION_LEVEL=${instr} " +\ + "MALI_COVERAGE=${coverage} " +\ + "MALI_BUS_LOG=${buslog} " + + make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs') + make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device) + make_action = make_action_start + env_options + make_action_end + + makeAction=Action(make_action, '$MAKECOMSTR') cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction]) # Add a dependency on kds.ko. @@ -104,20 +125,10 @@ if env['os'] != 'android': if int(env['ump']) == 1: env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko') -# need Module.symvers from kutf.ko build -if env['unit'] == '1': - env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kutf.ko') +if Glob('internal/sconsfrag'): + execfile('internal/sconsfrag') + get_internal(env) -# Until we fathom out how the invoke the Linux build system to clean, we can use Clean -# to remove generated files. -patterns = ['*.mod.c', '*.o', '*.ko', '*.a', '.*.cmd', 'modules.order', '.tmp_versions', 'Module.symvers'] - -for p in patterns: - Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/%s' % p)) - Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/config/%s' % p)) - Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/%s' % p)) - Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/%s' % ((env['platform_config']), p) )) - Clean(cmd, Glob('#kernel/drivers/gpu/arm/midgard/internal/*/%s' % p)) env.ProgTarget('kbase', cmd) env.AppendUnique(BASE=['cutils_linked_list']) -- 2.34.1