From: chenzhen Date: Tue, 5 Apr 2016 08:53:38 +0000 (+0800) Subject: MALI: rockchip: upgrade midgard DDK to r11p0-00rel0 X-Git-Tag: firefly_0821_release~2586 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3adc6eb16b2bdfb47fa83e8608ee11b25b6c2908;p=firefly-linux-kernel-4.4.55.git MALI: rockchip: upgrade midgard DDK to r11p0-00rel0 Conflicts: drivers/gpu/arm/midgard/mali_kbase_core_linux.c drivers/gpu/arm/midgard/mali_kbase_jd.c Change-Id: I9c910f2b08ffd2e9101fbe85958030ac7bca1642 Signed-off-by: chenzhen --- diff --git a/Documentation/devicetree/bindings/arm/mali-midgard.txt b/Documentation/devicetree/bindings/arm/mali-midgard.txt old mode 100755 new mode 100644 diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 84d7341ea3b3..fbfc618a526d 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r9p0-05rel0" +MALI_RELEASE_NAME ?= "r11p0-00rel0" # Paths required for build KBASE_PATH = $(src) @@ -110,13 +110,11 @@ SRC := \ mali_kbase_debug_job_fault.c \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c + mali_kbase_mem_pool_debugfs.c \ + mali_kbase_tlstream.c -ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) - SRC += mali_kbase_tlstream.c - ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_tlstream_test.c - endif +ifeq ($(MALI_UNIT_TEST),1) + SRC += mali_kbase_tlstream_test.c endif ifeq ($(MALI_CUSTOMER_RELEASE),0) diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index 4f70e31d7b25..8a33841af5a5 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -16,6 +16,7 @@ menuconfig MALI_MIDGARD tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID default n help Enable this option to build support for a ARM Mali Midgard GPU. @@ -23,38 +24,15 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. -choice - prompt "Streamline support" - depends on MALI_MIDGARD - default MALI_TIMELINE_DISABLED - help - Select streamline support configuration. - -config MALI_TIMELINE_DISABLED - bool "Streamline support disabled" - help - Disable support for ARM Streamline Performance Analyzer. - - Timeline support will not be included in - kernel code. - Debug stream will not be generated. - config MALI_GATOR_SUPPORT bool "Streamline support via Gator" + depends on MALI_MIDGARD + default n help Adds diagnostic support for use with the ARM Streamline Performance Analyzer. You will need the Gator device driver already loaded before loading this driver when enabling Streamline debug support. - -config MALI_MIPE_ENABLED - bool "Streamline support via MIPE" - help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - - Stream will be transmitted directly to Mali GPU library. - Compatible version of the library is required to read debug stream generated by kernel. - -endchoice + This is a legacy interface required by older versions of Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -92,6 +70,19 @@ menuconfig MALI_EXPERT Enabling this option and modifying the default settings may produce a driver with performance or other limitations. +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. + config MALI_DEBUG_SHADER_SPLIT_FS bool "Allow mapping of shader cores via sysfs" depends on MALI_MIDGARD && MALI_EXPERT @@ -198,11 +189,4 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_GPU_TRACEPOINTS - bool "Enable GPU tracepoints" - depends on MALI_MIDGARD && ANDROID - select GPU_TRACEPOINTS - help - Enables GPU tracepoints using Android trace event definitions. - source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index 92a14fa1bae1..2f3c41a55807 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,4 +19,10 @@ #include #include +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h index 42069fc88a1f..fe9869109a82 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,5 +22,13 @@ #include "mali_kbase.h" #include "mali_base_kernel.h" +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); #endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index 705b1ebfa87f..d410cd297889 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,8 +86,20 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + regdump->coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ regdump->coherency_features = COHERENCY_FEATURE_BIT(COHERENCY_NONE) | COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 2c987071a77c..4e70b34ffaa6 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ */ #include +#include #include #include #include @@ -78,6 +79,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, u32 irq_mask; int ret; u64 shader_cores_needed; + u32 prfcnt_config; KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); @@ -151,9 +153,22 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbase_pm_request_l2_caches(kbdev); /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + { + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + } +#endif + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) - | PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -174,8 +189,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, setup->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 33d6aef0ec72..ddaae344951f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,8 +15,6 @@ - - /* * Base kernel job manager APIs */ @@ -27,9 +25,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include #include @@ -86,6 +82,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ cfg = kctx->as_nr; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; #ifndef CONFIG_MALI_COH_GPU cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; @@ -95,6 +93,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && + (katom->atom_flags & KBASE_KATOM_FLAG_SECURE)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { @@ -111,6 +113,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id, kctx); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -130,7 +135,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_atom_config(katom, jc_head, katom->affinity, cfg); kbase_tlstream_tl_ret_ctx_lpu( @@ -142,7 +146,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); -#endif #ifdef CONFIG_GPU_TRACEPOINTS if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -204,6 +207,27 @@ static void kbasep_job_slot_update_head_start_timestamp( } } +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) +/** + * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint + * @kbdev: kbase device + * @i: job slot + * + * Get kbase atom by calling kbase_gpu_inspect for given job slot. + * Then use obtained katom and name of slot associated with the given + * job slot number in tracepoint call to the instrumentation module + * informing that given atom is no longer executed on given lpu (job slot). + */ +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); + + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[i]); +} +#endif + void kbase_job_done(struct kbase_device *kbdev, u32 done) { unsigned long flags; @@ -264,9 +288,15 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_job_softstop(i); + +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) + kbasep_trace_tl_nret_atom_lpu( + kbdev, i); #endif + /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can * be resumed */ @@ -437,12 +467,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_EXPORT_TEST_API(kbase_job_done); static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, - u16 core_reqs) + struct kbase_jd_atom *katom) { bool soft_stops_allowed = true; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { - if ((core_reqs & BASE_JD_REQ_T) != 0) + if (kbase_jd_katom_is_secure(katom)) { + soft_stops_allowed = false; + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((katom->core_req & BASE_JD_REQ_T) != 0) soft_stops_allowed = false; } return soft_stops_allowed; @@ -486,12 +518,13 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, if (action == JS_COMMAND_SOFT_STOP) { bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, - core_reqs); + target_katom); if (!soft_stop_allowed) { #ifdef CONFIG_MALI_DEBUG - dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); #endif /* CONFIG_MALI_DEBUG */ return; } @@ -499,9 +532,51 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; - } - if (action == JS_COMMAND_HARD_STOP) { + /* Mark the point where we issue the soft-stop command */ + kbase_tlstream_aux_issue_job_softstop(target_katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + int i; + + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking + * the system can cause this issue: normally, + * all memory should be allocated in multiples + * of 4 pages, and growable memory should be + * changed size in multiples of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a + * GPU reset, the locking up of a uTLB entry + * caused by the bad job could also stall other + * ASs, meaning that other ASs' jobs don't + * complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs + * when they would normally complete fine, so we + * must 'poke' the MMU regularly to help other + * ASs complete */ + kbase_as_poking_timer_retain_atom( + kbdev, katom->kctx, katom); + } + } + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); @@ -525,55 +600,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * hard-stop fails, so it is safe to just return and * ignore the hard-stop request. */ - dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_warn(kbdev->dev, + "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); return; } target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && - action == JS_COMMAND_SOFT_STOP) { - int i; - - for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom; - katom = kbase_gpu_inspect(kbdev, js, i); - - KBASE_DEBUG_ASSERT(katom); - - /* For HW_ISSUE_8316, only 'bad' jobs attacking the - * system can cause this issue: normally, all memory - * should be allocated in multiples of 4 pages, and - * growable memory should be changed size in multiples - * of 4 pages. - * - * Whilst such 'bad' jobs can be cleared by a GPU reset, - * the locking up of a uTLB entry caused by the bad job - * could also stall other ASs, meaning that other ASs' - * jobs don't complete in the 'grace' period before the - * reset. We don't want to lose other ASs' jobs when - * they would normally complete fine, so we must 'poke' - * the MMU regularly to help other ASs complete */ - kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, - katom); - } - } - - if (kbase_hw_has_feature(kbdev, + if (kbase_hw_has_feature( + kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - if (action == JS_COMMAND_SOFT_STOP) - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; - else action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); @@ -825,6 +866,13 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) { u32 flush_id = 0; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH), NULL); + mutex_unlock(&kbdev->pm.lock); + } return flush_id; } @@ -1044,7 +1092,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint */ if (hw_action == JS_COMMAND_SOFT_STOP && - !(kbasep_soft_stop_allowed(kbdev, core_reqs) && + !(kbasep_soft_stop_allowed(kbdev, target_katom) && (action & JS_COMMAND_SW_CAUSES_DISJOINT))) return; @@ -1283,10 +1331,21 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* Restore the HW counters setup */ if (restore_hwc) { struct kbase_context *kctx = kbdev->hwcnt.kctx; + u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; +#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -1308,8 +1367,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) hwcnt_setup.tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, + kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW * counters dump */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index c0168c74f815..af6cddcdb043 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -739,6 +739,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -770,6 +790,13 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + /* Only submit if head atom or previous atom + * already submitted */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { int err = 0; @@ -813,11 +840,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_secure(katom[idx]) && js == 0) || - !kbase_jd_katom_is_secure(katom[idx]), - "Secure atom on JS%d not supported", js); - katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 4fd13e2de63e..c6c7b89712da 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,9 +20,7 @@ #include #include #include -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include @@ -165,6 +163,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) KBASE_MMU_FAULT_TYPE_BUS : KBASE_MMU_FAULT_TYPE_PAGE; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + as->fault_extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), + kctx); + as->fault_extra_addr <<= 32; + as->fault_extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), + kctx); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ if (kbase_as_has_bus_fault(as)) { /* Mark bus fault as handled. @@ -203,10 +210,36 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; -#ifdef CONFIG_MALI_MIPE_ENABLED u32 transcfg = 0; -#endif +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ + /* Clear PTW_MEMATTR bits */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ + /* Clear PTW_SH bits */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx); + +#else /* CONFIG_MALI_GPU_MMU_AARCH64 */ + + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL, kctx); @@ -218,12 +251,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_as_config(as, current_setup->transtab, current_setup->memattr, transcfg); -#endif write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 947a7ed285d6..5805efea1125 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 60e40915869c..6eb1b1c2bf0d 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -219,9 +219,6 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See * &struct kbase_pm_callback_conf - * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. - * If enable is set then snoops should be enabled - * otherwise snoops should be disabled * * Note: * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the @@ -288,7 +285,6 @@ struct kbase_pm_backend_data { int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); int (*callback_power_runtime_idle)(struct kbase_device *kbdev); - }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 73b6cff641f6..2c27ad93f2be 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include #include @@ -99,6 +97,39 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, return (u32)core_type + (u32)action; } +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, + NULL); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + } +} +#endif /** * kbase_pm_invoke - Invokes an action on a core set @@ -134,7 +165,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, kbase_trace_mali_pm_power_off(core_type, cores); } #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -144,7 +175,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state &= ~cores; kbase_tlstream_aux_pm_state(core_type, state); } -#endif + /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -177,6 +208,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, case KBASE_PM_CORE_L2: KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); break; default: break; @@ -404,6 +437,12 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, /* All are ready, none will be turned off, and none are * transitioning */ kbdev->pm.backend.l2_powered = 1; + /* + * Ensure snoops are enabled after L2 is powered up, + * note that kbase keeps track of the snoop state, so + * safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); if (kbdev->l2_users_count > 0) { /* Notify any registered l2 cache users * (optimized out when no users waiting) */ @@ -665,7 +704,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pm_state( KBASE_PM_CORE_L2, kbase_pm_get_ready_cores( @@ -679,7 +718,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER)); -#endif KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.backend.gpu_in_desired_state, @@ -976,6 +1014,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) return false; } + kbase_pm_cache_snoop_disable(kbdev); /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; @@ -1058,9 +1097,11 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY /* Enable alternative hardware counter selection if configured. */ - if (DEFAULT_ALTERNATIVE_HWC) + if (!GPU_ID_IS_NEW_FORMAT(prod_id)) kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; +#endif /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) @@ -1094,6 +1135,12 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, we + * optimize the use of shared memory and thus we expect + * some disparity in the memory configuration */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } /* Only for T86x/T88x-based products after r2p0 */ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { @@ -1158,6 +1205,33 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) } +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; + } +} + +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE) && + kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { @@ -1182,6 +1256,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* Ensure interrupts are off to begin with, this also clears any * outstanding interrupts */ kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); /* Prepare for the soft-reset */ kbdev->pm.backend.reset_done = false; @@ -1200,9 +1276,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* Soft reset the GPU */ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_jd_gpu_soft_reset(kbdev); -#endif + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL); @@ -1282,6 +1358,15 @@ out: kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + u32 gpu_status = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), NULL); + + kbdev->secure_mode = (gpu_status & + GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0; + } /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index 943eda567cb5..aa51b8cdef8f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -501,5 +501,23 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 2102f43348cb..29c78c2ba6d8 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -46,6 +46,9 @@ enum base_hw_feature { BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_END }; @@ -159,5 +162,28 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_END +}; #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 66c2dc76fdb3..e111b0777f1f 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -100,7 +100,14 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -167,6 +174,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -204,6 +212,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -238,6 +247,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -290,6 +300,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -331,7 +342,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -354,7 +367,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -375,7 +390,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -398,7 +415,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -419,7 +438,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -437,7 +458,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -454,6 +477,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -470,6 +494,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -486,6 +511,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -499,6 +525,7 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -515,6 +542,8 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -532,6 +561,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -549,6 +579,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -566,7 +597,9 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -582,7 +615,9 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -598,6 +633,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -613,6 +649,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -624,6 +661,8 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -640,7 +679,9 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -656,6 +697,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -671,6 +713,7 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -682,6 +725,7 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -699,6 +743,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -714,6 +759,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -726,6 +772,8 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -743,6 +791,8 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -759,6 +809,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -774,6 +825,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -786,10 +838,50 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_END +}; +static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + GPUCORE_1619, + BASE_HW_ISSUE_END +}; #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 212100dbff63..c59e8b2c04ca 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,7 +42,11 @@ /* Support UK9 IOCTLS */ #define BASE_LEGACY_UK9_SUPPORT 1 -typedef u64 base_mem_handle; +typedef struct base_mem_handle { + struct { + u64 handle; + } basep; +} base_mem_handle; #include "mali_base_mem_priv.h" #include "mali_kbase_profiling_gator_api.h" @@ -62,6 +66,10 @@ typedef u64 base_mem_handle; #define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) #define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 #define BASE_MAX_COHERENT_GROUPS 16 @@ -162,7 +170,9 @@ enum { /* IN */ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence Outer shareable, required. */ - BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ + BASE_MEM_SECURE = (1U << 16), /**< Secure memory */ + BASE_MEM_DONT_NEED = (1U << 17), /**< Not needed physical + memory */ }; @@ -171,7 +181,7 @@ enum { * * Must be kept in sync with the ::base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 17 +#define BASE_MEM_FLAGS_NR_BITS 18 /** * A mask for all output bits, excluding IN/OUT bits. @@ -184,6 +194,13 @@ enum { #define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) +/** + * A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -224,14 +241,32 @@ struct base_mem_import_user_buffer { }; /** - * @brief Invalid memory handle type. - * Return value from functions returning @a base_mem_handle on error. + * @brief Invalid memory handle. + * + * Return value from functions returning @ref base_mem_handle on error. + * + * @warning @ref base_mem_handle_new_invalid must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. */ -#define BASE_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) + +/** + * @brief Special write-alloc memory handle. + * + * A special handle is used to represent a region where a special page is mapped + * with a write-alloc cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * + * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. + */ +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) #define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) #define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) #define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) /* reserved handles ..-64< for future special handles */ #define BASE_MEM_COOKIE_BASE (64ul << 12) #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ @@ -240,6 +275,7 @@ struct base_mem_import_user_buffer { /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL + /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -355,6 +391,28 @@ struct base_mem_aliasing_info { u64 length; }; +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + */ +struct base_jit_alloc_info { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + /** * @brief Job dependency type. * @@ -407,6 +465,14 @@ typedef u16 base_jd_core_req; /* Requires fragment job with AFBC encoding */ #define BASE_JD_REQ_FS_AFBC (1U << 13) +/** + * SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + */ +#define BASE_JD_REQ_EVENT_COALESCE (1U << 5) + /** * SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. @@ -476,6 +542,66 @@ typedef u16 base_jd_core_req; * - Priority is inherited from the replay job. */ #define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) +/** + * SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/** + * SW only requirement: Just In Time allocation + * + * This job requests a JIT allocation based on the request in the + * @base_jit_alloc_info structure which is passed via the jc element of + * the atom. + * + * It should be noted that the id entry in @base_jit_alloc_info must not + * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) +/** + * SW only requirement: Just In Time free + * + * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC + * to be freed. The ID of the JIT allocation is passed via the jc element of + * the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/** + * SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) +/** + * SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) /** * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) @@ -513,26 +639,21 @@ typedef u16 base_jd_core_req; #define BASEP_JD_REQ_EVENT_NEVER (1U << 14) /** -* These requirement bits are currently unused in base_jd_core_req (currently a u16) -*/ - -#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) -#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) - -/** -* Mask of all the currently unused requirement bits in base_jd_core_req. -*/ + * These requirement bits are currently unused in base_jd_core_req (currently a u16) + */ -#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ - BASEP_JD_REQ_RESERVED_BIT15) +#define BASEP_JD_REQ_RESERVED (1U << 15) /** * Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ - BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) +#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ + BASE_JD_REQ_EXTERNAL_RESOURCES |\ + BASEP_JD_REQ_EVENT_NEVER |\ + BASE_JD_REQ_EVENT_COALESCE)) /** * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which @@ -674,6 +795,30 @@ typedef struct base_external_resource { u64 ext_resource; } base_external_resource; + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + u64 address; + u64 size; +}; + /** * @brief Setter for a dependency structure * diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index fe58341fdef8..3483acfcfaaf 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -139,7 +139,6 @@ void kbase_jd_done_worker(struct work_struct *data); void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); @@ -191,9 +190,17 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); +void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom); bool kbase_replay_process(struct kbase_jd_atom *katom); +enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer); +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); +int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status); +int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status); + /* api used internally for register access. Contains validation and tracing */ void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); int kbase_device_trace_buffer_install( diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index 2fb5e3edf49f..c67b3e97f1af 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,6 +43,11 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_device(kbdev->dev, handle, size, dir); } @@ -50,5 +55,10 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h index 816e45c4d02d..356d52bcd774 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,13 +45,6 @@ * @{ */ -#if !MALI_CUSTOMER_RELEASE -/* This flag is set for internal builds so we can run tests without credentials. */ -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 -#else -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 -#endif - #include /* Forward declaration of struct kbase_device */ @@ -105,7 +98,7 @@ struct kbase_pm_callback_conf { * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). */ void (*power_off_callback)(struct kbase_device *kbdev); @@ -115,7 +108,7 @@ struct kbase_pm_callback_conf { * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. * If the GPU state has been lost then this function must return 1, otherwise it should return 0. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * @@ -160,7 +153,7 @@ struct kbase_pm_callback_conf { * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * - * @return 0 on success, else int erro code. + * @return 0 on success, else int error code. */ int (*power_runtime_init_callback)(struct kbase_device *kbdev); @@ -203,8 +196,8 @@ struct kbase_pm_callback_conf { * Returning 0 will cause the runtime PM core to conduct a regular * autosuspend. * - * This callback is optional and if not provided regular ausosuspend - * will triggered. + * This callback is optional and if not provided regular autosuspend + * will be triggered. * * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use * this feature. diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index bd48ed96e962..7cabbecea23f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -148,11 +148,6 @@ enum { */ #define DEFAULT_AWID_LIMIT KBASE_AID_32 -/** - * Default setting for using alternative hardware counters. - */ -#define DEFAULT_ALTERNATIVE_HWC false - /** * Default UMP device mapping. A UMP_DEVICE__SHIFT value which * defines which UMP device this GPU should be mapped to. @@ -220,6 +215,12 @@ enum { */ #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ +/* + * Default timeout for software event jobs, after which these jobs will be + * cancelled. + */ +#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */ + /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 798979963937..d53f728d08e8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ #include #include #include - +#include /** * kbase_create_context() - Create a kernel base context. @@ -72,11 +72,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_kctx; + err = kbase_mem_evictable_init(kctx); + if (err) + goto free_pool; + atomic_set(&kctx->used_pages, 0); err = kbase_jd_init(kctx); if (err) - goto free_pool; + goto deinit_evictable; err = kbasep_js_kctx_init(kctx); if (err) @@ -89,6 +93,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->reg_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + spin_lock_init(&kctx->waiting_soft_jobs_lock); #ifdef CONFIG_KDS INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif @@ -115,6 +120,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) err = kbase_region_tracker_init(kctx); if (err) goto no_region_tracker; + + err = kbase_sticky_resource_init(kctx); + if (err) + goto no_sticky; + + err = kbase_jit_init(kctx); + if (err) + goto no_jit; #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif @@ -126,8 +139,18 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); + hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker; + return kctx; +no_jit: + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + kbase_gpu_vm_unlock(kctx); +no_sticky: + kbase_region_tracker_term(kctx); no_region_tracker: kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); no_sink_page: @@ -143,6 +166,8 @@ free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); kbase_jd_exit(kctx); +deinit_evictable: + kbase_mem_evictable_deinit(kctx); free_pool: kbase_mem_pool_term(&kctx->mem_pool); free_kctx: @@ -188,8 +213,18 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_jd_zap_context(kctx); kbase_event_cleanup(kctx); + /* + * JIT must be terminated before the code below as it must be called + * without the region lock being held. + * The code above ensures no new JIT allocations can be made by + * by the time we get to this point of context tear down. + */ + kbase_jit_term(kctx); + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + /* MMU is disabled as part of scheduling out the context */ kbase_mmu_free_pgd(kctx); @@ -225,6 +260,7 @@ void kbase_destroy_context(struct kbase_context *kctx) if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + kbase_mem_evictable_deinit(kctx); kbase_mem_pool_term(&kctx->mem_pool); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index ab6f5e45eacc..061c8df68bfb 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -1,7 +1,6 @@ - /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,9 +97,7 @@ #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 @@ -357,7 +354,6 @@ out: } #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_MIPE_ENABLED static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -397,7 +393,8 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) list_for_each_entry(element, &kbdev->kctx_list, link) { kbase_tlstream_tl_summary_new_ctx( element->kctx, - (u32)(element->kctx->id)); + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); } /* Before releasing the lock, reset body stream buffers. * This will prevent context creation message to be directed to both @@ -409,7 +406,6 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) * user space. */ kbase_tlstream_flush_streams(); } -#endif static void kbase_api_handshake(struct uku_version_check_args *version) { @@ -476,6 +472,34 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; +enum { + inited_mem = (1u << 0), + inited_js = (1u << 1), + inited_pm_runtime_init = (1u << 2), +#ifdef CONFIG_MALI_DEVFREQ + inited_devfreq = (1u << 3), +#endif /* CONFIG_MALI_DEVFREQ */ + inited_tlstream = (1u << 4), + inited_backend_early = (1u << 5), + inited_backend_late = (1u << 6), + inited_device = (1u << 7), + inited_vinstr = (1u << 8), +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + inited_ipa = (1u << 9), +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ + inited_job_fault = (1u << 10), + inited_misc_register = (1u << 11), + inited_get_device = (1u << 12), + inited_sysfs_group = (1u << 13), + inited_dev_list = (1u << 14), + inited_debugfs = (1u << 15), + inited_gpu_device = (1u << 16), + inited_registers_map = (1u << 17), + inited_power_control = (1u << 19), + inited_buslogger = (1u << 20) +}; + + #ifdef CONFIG_MALI_DEBUG #define INACTIVE_WAIT_MS (5000) @@ -561,6 +585,18 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg /* setup complete, perform normal operation */ switch (id) { + case KBASE_FUNC_MEM_JIT_INIT: + { + struct kbase_uk_mem_jit_init *jit_init = args; + + if (sizeof(*jit_init) != args_size) + goto bad_size; + + if (kbase_region_tracker_init_jit(kctx, + jit_init->va_pages)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } case KBASE_FUNC_MEM_ALLOC: { struct kbase_uk_mem_alloc *mem = args; @@ -569,6 +605,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem) != args_size) goto bad_size; +#if defined(CONFIG_64BIT) + if (!kctx->is_compat) { + /* force SAME_VA if a 64-bit client */ + mem->flags |= BASE_MEM_SAME_VA; + } +#endif + reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, @@ -779,7 +822,7 @@ copy_failed: if (sizeof(*sn) != args_size) goto bad_size; - if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { + if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) { dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -1075,7 +1118,7 @@ copy_failed: goto bad_size; if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kbdev->dev, "buffer too big"); + dev_err(kbdev->dev, "buffer too big\n"); goto out_bad; } @@ -1119,7 +1162,6 @@ copy_failed: } #endif /* CONFIG_MALI_NO_MALI */ -#ifdef CONFIG_MALI_MIPE_ENABLED case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -1179,7 +1221,6 @@ copy_failed: break; } #endif /* MALI_UNIT_TEST */ -#endif /* CONFIG_MALI_MIPE_ENABLED */ case KBASE_FUNC_GET_CONTEXT_ID: { @@ -1189,8 +1230,34 @@ copy_failed: break; } + case KBASE_FUNC_SOFT_EVENT_UPDATE: + { + struct kbase_uk_soft_event_update *update = args; + + if (sizeof(*update) != args_size) + goto bad_size; + + if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && + (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || + (update->flags != 0)) + goto out_bad; + + if (kbasep_write_soft_event_status( + kctx, update->evt, + update->new_status) != 0) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (update->new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events( + kctx, update->evt); + + break; + } + default: - dev_err(kbdev->dev, "unknown ioctl %u", id); + dev_err(kbdev->dev, "unknown ioctl %u\n", id); goto out_bad; } @@ -1207,6 +1274,47 @@ static struct kbase_device *to_kbase_device(struct device *dev) return dev_get_drvdata(dev); } +static int assign_irqs(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int i; + + if (!kbdev) + return -ENODEV; + + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + return -ENOENT; + } + +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) { + irqtag = JOB_IRQ_TAG; + } else if (!strcmp(irq_res->name, "MMU")) { + irqtag = MMU_IRQ_TAG; + } else if (!strcmp(irq_res->name, "GPU")) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + return -EINVAL; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + } + + return 0; +} + /* * API to acquire device list mutex and * return pointer to the device list head @@ -1309,6 +1417,7 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); + kbase_jit_debugfs_add(kctx); #endif /* CONFIG_DEBUGFS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1321,11 +1430,10 @@ static int kbase_open(struct inode *inode, struct file *filp) mutex_lock(&kbdev->kctx_list_lock); element->kctx = kctx; list_add(&element->link, &kbdev->kctx_list); -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_new_ctx( element->kctx, - (u32)(element->kctx->id)); -#endif + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); mutex_unlock(&kbdev->kctx_list_lock); } else { /* we don't treat this as a fail - just warn about it */ @@ -1346,9 +1454,7 @@ static int kbase_release(struct inode *inode, struct file *filp) struct kbasep_kctx_list_element *element, *tmp; bool found_element = false; -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_del_ctx(kctx); -#endif #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); @@ -1613,11 +1719,12 @@ static unsigned long kbase_get_unmapped_area(struct file *filp, flags); if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { - info.high_limit = 1ul << 33; + info.high_limit = kctx->same_va_end << PAGE_SHIFT; info.align_mask = 0; info.align_offset = 0; } else { - info.high_limit = mm->mmap_base; + info.high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); if (len >= SZ_2M) { info.align_offset = SZ_2M; info.align_mask = SZ_2M - 1; @@ -1984,6 +2091,70 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); +/** + * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs + * file. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This allows setting the timeout for software event jobs. Waiting jobs will + * be cancelled after this period expires. This is expressed in milliseconds. + * + * Return: count if the function succeeded. An error code on failure. + */ +static ssize_t set_soft_event_timeout(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int soft_event_timeout_ms; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) || + (soft_event_timeout_ms <= 0)) + return -EINVAL; + + atomic_set(&kbdev->js_data.soft_event_timeout_ms, + soft_event_timeout_ms); + + return count; +} + +/** + * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs + * file. + * + * This will return the timeout for the software event jobs. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_soft_event_timeout(struct device *dev, + struct device_attribute *attr, + char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%i\n", + atomic_read(&kbdev->js_data.soft_event_timeout_ms)); +} + +static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR, + show_soft_event_timeout, set_soft_event_timeout); + /** Store callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2715,6 +2886,8 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TMIx" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3042,26 +3215,53 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, set_mem_pool_max_size); +static int kbasep_secure_mode_enable(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE, NULL); + return 0; +} -static int kbasep_secure_mode_init(struct kbase_device *kbdev) +static int kbasep_secure_mode_disable(struct kbase_device *kbdev) { + if (!kbase_prepare_to_reset_gpu_locked(kbdev)) + return -EBUSY; + + kbase_reset_gpu_locked(kbdev); + + return 0; +} + +static struct kbase_secure_ops kbasep_secure_ops = { + .secure_mode_enable = kbasep_secure_mode_enable, + .secure_mode_disable = kbasep_secure_mode_disable, +}; +static void kbasep_secure_mode_init(struct kbase_device *kbdev) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + /* Use native secure ops */ + kbdev->secure_ops = &kbasep_secure_ops; + kbdev->secure_mode_support = true; + } #ifdef SECURE_CALLBACKS - kbdev->secure_ops = SECURE_CALLBACKS; - kbdev->secure_mode_support = false; + else { + kbdev->secure_ops = SECURE_CALLBACKS; + kbdev->secure_mode_support = false; - if (kbdev->secure_ops) { - int err; + if (kbdev->secure_ops) { + int err; - /* Make sure secure mode is disabled on startup */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); + /* Make sure secure mode is disabled on startup */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); - /* secure_mode_disable() returns -EINVAL if not supported */ - kbdev->secure_mode_support = (err != -EINVAL); + /* secure_mode_disable() returns -EINVAL if not + * supported + */ + kbdev->secure_mode_support = (err != -EINVAL); + } } #endif - - return 0; } #ifdef CONFIG_MALI_NO_MALI @@ -3100,11 +3300,145 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { - iounmap(kbdev->reg); - release_mem_region(kbdev->reg_start, kbdev->reg_size); + if (kbdev->reg) { + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = NULL; + kbdev->reg_start = 0; + kbdev->reg_size = 0; + } } #endif /* CONFIG_MALI_NO_MALI */ +static int registers_map(struct kbase_device * const kbdev) +{ + /* the first memory resource is the physical address of the GPU + * registers */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; + + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } + + return 0; +} + +static void registers_unmap(struct kbase_device *kbdev) +{ + kbase_common_reg_unmap(kbdev); +} + +static int power_control_init(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int err = 0; + + if (!kbdev) + return -ENODEV; + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); + if (IS_ERR_OR_NULL(kbdev->regulator)) { + err = PTR_ERR(kbdev->regulator); + kbdev->regulator = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get regulator\n"); + return err; + } + dev_info(kbdev->dev, + "Continuing without Mali regulator control\n"); + /* Allow probe to continue without regulator */ + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); + if (IS_ERR_OR_NULL(kbdev->clock)) { + err = PTR_ERR(kbdev->clock); + kbdev->clock = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get clock\n"); + goto fail; + } + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + /* Allow probe to continue without clock. */ + } else { + err = clk_prepare_enable(kbdev->clock); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", + err); + goto fail; + } + } + +#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) + /* Register the OPPs if they are available in device tree */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + err = dev_pm_opp_of_add_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + err = of_init_opp_table(kbdev->dev); +#else + err = 0; +#endif /* LINUX_VERSION_CODE */ + if (err) + dev_dbg(kbdev->dev, "OPP table not found\n"); +#endif /* CONFIG_OF && CONFIG_PM_OPP */ + + return 0; + +fail: + +if (kbdev->clock != NULL) { + clk_put(kbdev->clock); + kbdev->clock = NULL; +} + +#ifdef CONFIG_REGULATOR + if (NULL != kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif + + return err; +} + +static void power_control_term(struct kbase_device *kbdev) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + dev_pm_opp_of_remove_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) + of_free_opp_table(kbdev->dev); +#endif + + if (kbdev->clock) { + clk_disable_unprepare(kbdev->clock); + clk_put(kbdev->clock); + kbdev->clock = NULL; + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + if (kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} #ifdef CONFIG_DEBUG_FS @@ -3317,534 +3651,406 @@ static void kbase_logging_started_cb(void *data) #endif -static int kbase_common_device_init(struct kbase_device *kbdev) -{ - int err; - struct mali_base_gpu_core_props *core_props; - enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - inited_pm_runtime_init = (1u << 6), -#ifdef CONFIG_MALI_DEVFREQ - inited_devfreq = (1u << 9), -#endif /* CONFIG_MALI_DEVFREQ */ -#ifdef CONFIG_MALI_MIPE_ENABLED - inited_tlstream = (1u << 10), -#endif /* CONFIG_MALI_MIPE_ENABLED */ - inited_backend_early = (1u << 11), - inited_backend_late = (1u << 12), - inited_device = (1u << 13), - inited_vinstr = (1u << 19), - inited_ipa = (1u << 20), - inited_job_fault = (1u << 21) - }; - - int inited = 0; - u32 gpu_id; -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - u32 ve_logic_tile = 0; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif +#if !MALI_CUSTOMER_RELEASE + &dev_attr_force_replay.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_soft_event_timeout.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_availability_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + NULL +}; - dev_set_drvdata(kbdev->dev, kbdev); +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; - err = kbase_backend_early_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_early; +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + const struct list_head *dev_list; - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); - - kbase_disjoint_init(kbdev); - - /* obtain min/max configured gpu frequencies */ - core_props = &(kbdev->gpu_props.props.core_props); + if (!kbdev) + return -ENODEV; - /* For versatile express platforms, min and max values of GPU frequency - * depend on the type of the logic tile; these values may not be known - * at the build time so in some cases a platform config file with wrong - * GPU freguency values may be included; to ensure the correct value of - * min and max GPU frequency is obtained, the type of the logic tile is - * read from the corresponding register on the platform and frequency - * values assigned accordingly.*/ -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - ve_logic_tile = kbase_get_platform_logic_tile_type(); - - switch (ve_logic_tile) { - case 0x217: - /* Virtex 6, HBI0217 */ - core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX; - break; - case 0x247: - /* Virtex 7, HBI0247 */ - core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX; - break; - default: - /* all other logic tiles, i.e., Virtex 5 HBI0192 - * or unsuccessful reading from the platform - - * fall back to the config_platform default */ - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; - break; +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + if (kbdev->inited_subsys & inited_buslogger) { + bl_core_client_unregister(kbdev->buslogger); + kbdev->inited_subsys &= ~inited_buslogger; } -#else - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ - - kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; +#endif - err = kbase_device_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Can't initialize device (%d)\n", err); - goto out_partial; + if (kbdev->inited_subsys & inited_sysfs_group) { + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->inited_subsys &= ~inited_sysfs_group; } - inited |= inited_device; - - if (kbdev->pm.callback_power_runtime_init) { - err = kbdev->pm.callback_power_runtime_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_pm_runtime_init; + if (kbdev->inited_subsys & inited_dev_list) { + dev_list = kbase_dev_list_get(); + list_del(&kbdev->entry); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys &= ~inited_dev_list; } - err = kbase_mem_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_mem; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - kbase_device_coherency_init(kbdev, gpu_id); - - err = kbasep_secure_mode_init(kbdev); - if (err) - goto out_partial; - - err = kbasep_js_devdata_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_js; - -#ifdef CONFIG_MALI_MIPE_ENABLED - err = kbase_tlstream_init(); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize timeline stream\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_misc_register) { + misc_deregister(&kbdev->mdev); + kbdev->inited_subsys &= ~inited_misc_register; } - inited |= inited_tlstream; -#endif /* CONFIG_MALI_MIPE_ENABLED */ - err = kbase_backend_late_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_late; - -#ifdef CONFIG_MALI_DEVFREQ - err = kbase_devfreq_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize devfreq\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_get_device) { + put_device(kbdev->dev); + kbdev->inited_subsys &= ~inited_get_device; } - inited |= inited_devfreq; -#endif /* CONFIG_MALI_DEVFREQ */ - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { - dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_debugfs) { + kbase_device_debugfs_term(kbdev); + kbdev->inited_subsys &= ~inited_debugfs; } - inited |= inited_vinstr; - - kbdev->ipa_ctx = kbase_ipa_init(kbdev); - if (!kbdev->ipa_ctx) { - dev_err(kbdev->dev, "Can't initialize IPA\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_job_fault) { + kbase_debug_job_fault_dev_term(kbdev); + kbdev->inited_subsys &= ~inited_job_fault; } - inited |= inited_ipa; - - err = kbase_debug_job_fault_dev_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_job_fault; - - err = kbase_device_debugfs_init(kbdev); - if (err) - goto out_partial; - - /* intialise the kctx list */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); - - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); - goto out_misc; +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + if (kbdev->inited_subsys & inited_ipa) { + kbase_ipa_term(kbdev->ipa_ctx); + kbdev->inited_subsys &= ~inited_ipa; } +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - { - const struct list_head *dev_list = kbase_dev_list_get(); - - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); + if (kbdev->inited_subsys & inited_vinstr) { + kbase_vinstr_term(kbdev->vinstr_ctx); + kbdev->inited_subsys &= ~inited_vinstr; } - dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - - kbase_dev_nr++; - - return 0; - -out_misc: - put_device(kbdev->dev); - kbase_device_debugfs_term(kbdev); -out_partial: - if (inited & inited_job_fault) - kbase_debug_job_fault_dev_term(kbdev); - if (inited & inited_ipa) - kbase_ipa_term(kbdev->ipa_ctx); - if (inited & inited_vinstr) - kbase_vinstr_term(kbdev->vinstr_ctx); #ifdef CONFIG_MALI_DEVFREQ - if (inited & inited_devfreq) + if (kbdev->inited_subsys & inited_devfreq) { kbase_devfreq_term(kbdev); -#endif /* CONFIG_MALI_DEVFREQ */ - if (inited & inited_backend_late) + kbdev->inited_subsys &= ~inited_devfreq; + } +#endif + + if (kbdev->inited_subsys & inited_backend_late) { kbase_backend_late_term(kbdev); -#ifdef CONFIG_MALI_MIPE_ENABLED - if (inited & inited_tlstream) + kbdev->inited_subsys &= ~inited_backend_late; + } + + if (kbdev->inited_subsys & inited_tlstream) { kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ + kbdev->inited_subsys &= ~inited_tlstream; + } + + /* Bring job and mem sys to a halt before we continue termination */ - if (inited & inited_js) + if (kbdev->inited_subsys & inited_js) kbasep_js_devdata_halt(kbdev); - if (inited & inited_mem) + if (kbdev->inited_subsys & inited_mem) kbase_mem_halt(kbdev); - if (inited & inited_js) + if (kbdev->inited_subsys & inited_js) { kbasep_js_devdata_term(kbdev); + kbdev->inited_subsys &= ~inited_js; + } - if (inited & inited_mem) + if (kbdev->inited_subsys & inited_mem) { kbase_mem_term(kbdev); + kbdev->inited_subsys &= ~inited_mem; + } - if (inited & inited_pm_runtime_init) { - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); + if (kbdev->inited_subsys & inited_pm_runtime_init) { + kbdev->pm.callback_power_runtime_term(kbdev); + kbdev->inited_subsys &= ~inited_pm_runtime_init; } - if (inited & inited_device) + if (kbdev->inited_subsys & inited_device) { kbase_device_term(kbdev); + kbdev->inited_subsys &= ~inited_device; + } - if (inited & inited_backend_early) + if (kbdev->inited_subsys & inited_backend_early) { kbase_backend_early_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_early; + } - return err; -} + if (kbdev->inited_subsys & inited_power_control) { + power_control_term(kbdev); + kbdev->inited_subsys &= ~inited_power_control; + } -static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG - &dev_attr_debug_command.attr, - &dev_attr_js_softstop_always.attr, -#endif -#if !MALI_CUSTOMER_RELEASE - &dev_attr_force_replay.attr, -#endif - &dev_attr_js_timeouts.attr, - &dev_attr_gpuinfo.attr, - &dev_attr_dvfs_period.attr, - &dev_attr_pm_poweroff.attr, - &dev_attr_reset_timeout.attr, - &dev_attr_js_scheduling_period.attr, - &dev_attr_power_policy.attr, - &dev_attr_core_availability_policy.attr, - &dev_attr_core_mask.attr, - &dev_attr_mem_pool_size.attr, - &dev_attr_mem_pool_max_size.attr, - NULL -}; + if (kbdev->inited_subsys & inited_registers_map) { + registers_unmap(kbdev); + kbdev->inited_subsys &= ~inited_registers_map; + } -static const struct attribute_group kbase_attr_group = { - .attrs = kbase_attrs, -}; +#ifdef CONFIG_MALI_NO_MALI + if (kbdev->inited_subsys & inited_gpu_device) { + gpu_device_destroy(kbdev); + kbdev->inited_subsys &= ~inited_gpu_device; + } +#endif /* CONFIG_MALI_NO_MALI */ + + if (kbdev->inited_subsys != 0) + dev_err(kbdev->dev, "Missing sub system termination\n"); + + kbase_device_free(kbdev); -static int kbase_common_device_remove(struct kbase_device *kbdev); + return 0; +} static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; - struct resource *reg_res; + struct mali_base_gpu_core_props *core_props; + u32 gpu_id; + const struct list_head *dev_list; int err = 0; - int i; #ifdef CONFIG_OF err = kbase_platform_early_init(); if (err) { dev_err(&pdev->dev, "Early platform initialization failed\n"); + kbase_platform_device_remove(pdev); return err; } #endif kbdev = kbase_device_alloc(); if (!kbdev) { - dev_err(&pdev->dev, "Can't allocate device\n"); - err = -ENOMEM; - goto out; + dev_err(&pdev->dev, "Allocate device failed\n"); + kbase_platform_device_remove(pdev); + return -ENOMEM; } + + kbdev->dev = &pdev->dev; + dev_set_drvdata(kbdev->dev, kbdev); + #ifdef CONFIG_MALI_NO_MALI err = gpu_device_create(kbdev); if (err) { - dev_err(&pdev->dev, "Can't initialize dummy model\n"); - goto out_midg; + dev_err(&pdev->dev, "Dummy model initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_gpu_device; #endif /* CONFIG_MALI_NO_MALI */ - kbdev->dev = &pdev->dev; - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; + err = assign_irqs(pdev); + if (err) { + dev_err(&pdev->dev, "IRQ search failed\n"); + kbase_platform_device_remove(pdev); + return err; + } - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - err = -ENOENT; - goto out_platform_irq; - } + err = registers_map(kbdev); + if (err) { + dev_err(&pdev->dev, "Register map failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_registers_map; -#ifdef CONFIG_OF - if (!strcmp(irq_res->name, "JOB")) { - irqtag = JOB_IRQ_TAG; - } else if (!strcmp(irq_res->name, "MMU")) { - irqtag = MMU_IRQ_TAG; - } else if (!strcmp(irq_res->name, "GPU")) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - err = -EINVAL; - goto out_irq_name; - } -#else - irqtag = i; -#endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); + err = power_control_init(pdev); + if (err) { + dev_err(&pdev->dev, "Power control initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } - /* the first memory resource is the physical address of the GPU - * registers */ - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - err = -ENOENT; - goto out_platform_mem; - } + kbdev->inited_subsys |= inited_power_control; - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); - err = kbase_common_reg_map(kbdev); - if (err) - goto out_reg_map; + err = kbase_backend_early_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Early backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_backend_early; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); - if (IS_ERR_OR_NULL(kbdev->regulator)) { - err = PTR_ERR(kbdev->regulator); + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); - kbdev->regulator = NULL; - if (err == -EPROBE_DEFER) - goto out_regulator; - dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); - /* Allow probe to continue without regulator */ - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + kbase_disjoint_init(kbdev); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_enable(kbdev->dev); -#endif + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; - kbdev->clock = clk_get(kbdev->dev, "clk_mali"); - if (IS_ERR_OR_NULL(kbdev->clock)) { - err = PTR_ERR(kbdev->clock); + kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; - kbdev->clock = NULL; - if (err == -EPROBE_DEFER) - goto out_clock_prepare; - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); - /* Allow probe to continue without clock. */ - } else { - err = clk_prepare_enable(kbdev->clock); + err = kbase_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_device; + + if (kbdev->pm.callback_power_runtime_init) { + err = kbdev->pm.callback_power_runtime_init(kbdev); if (err) { dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", err); - goto out_clock_prepare; + "Runtime PM initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_pm_runtime_init; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_PM_OPP) - /* Register the OPPs if they are available in device tree */ - if (dev_pm_opp_of_add_table(kbdev->dev) < 0) - dev_dbg(kbdev->dev, "OPP table not found\n"); -#endif + err = kbase_mem_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_mem; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + kbase_device_coherency_init(kbdev, gpu_id); - err = kbase_common_device_init(kbdev); + kbasep_secure_mode_init(kbdev); + + err = kbasep_js_devdata_init(kbdev); if (err) { - dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); - goto out_common_init; + dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_js; - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + err = kbase_tlstream_init(); if (err) { - dev_err(&pdev->dev, "Failed to create sysfs entries\n"); - goto out_sysfs; + dev_err(kbdev->dev, "Timeline stream initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_tlstream; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); + err = kbase_backend_late_init(kbdev); if (err) { - dev_err(kbdev->dev, "Couldn't register bus log client\n"); - goto out_bl_core_register; + dev_err(kbdev->dev, "Late backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_backend_late; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); -#endif - return 0; - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -out_bl_core_register: - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); -#endif - -out_sysfs: - kbase_common_device_remove(kbdev); -out_common_init: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) - dev_pm_opp_of_remove_table(kbdev->dev); -#endif - clk_disable_unprepare(kbdev->clock); -out_clock_prepare: - clk_put(kbdev->clock); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) -out_regulator: - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - kbase_common_reg_unmap(kbdev); -out_reg_map: -out_platform_mem: -#ifdef CONFIG_OF -out_irq_name: -#endif -out_platform_irq: -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -out_midg: -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); -out: - return err; -} +#ifdef CONFIG_MALI_DEVFREQ + err = kbase_devfreq_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Fevfreq initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_devfreq; +#endif /* CONFIG_MALI_DEVFREQ */ -static int kbase_common_device_remove(struct kbase_device *kbdev) -{ - kbase_debug_job_fault_dev_term(kbdev); - kbase_ipa_term(kbdev->ipa_ctx); - kbase_vinstr_term(kbdev->vinstr_ctx); - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, + "Virtual instrumentation initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } + kbdev->inited_subsys |= inited_vinstr; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - if (kbdev->buslogger) - bl_core_client_unregister(kbdev->buslogger); -#endif +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(kbdev->mali_debugfs_directory); -#endif -#ifdef CONFIG_MALI_DEVFREQ - kbase_devfreq_term(kbdev); -#endif + kbdev->inited_subsys |= inited_ipa; +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - kbase_backend_late_term(kbdev); + err = kbase_debug_job_fault_dev_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Job fault debug initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_job_fault; - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif + err = kbase_device_debugfs_init(kbdev); + if (err) { + dev_err(kbdev->dev, "DebugFS initialization failed"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_debugfs; -#ifdef CONFIG_MALI_MIPE_ENABLED - kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ + /* initialize the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); - kbasep_js_devdata_halt(kbdev); - kbase_mem_halt(kbdev); + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->inited_subsys |= inited_get_device; - kbasep_js_devdata_term(kbdev); - kbase_mem_term(kbdev); - kbase_backend_early_term(kbdev); + err = misc_register(&kbdev->mdev); + if (err) { + dev_err(kbdev->dev, "Misc device registration failed for %s\n", + kbdev->devname); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_misc_register; - { - const struct list_head *dev_list = kbase_dev_list_get(); + dev_list = kbase_dev_list_get(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys |= inited_dev_list; - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - } - misc_deregister(&kbdev->mdev); - put_device(kbdev->dev); - kbase_common_reg_unmap(kbdev); - kbase_device_term(kbdev); - if (kbdev->clock) { - clk_disable_unprepare(kbdev->clock); - clk_put(kbdev->clock); - kbdev->clock = NULL; + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (err) { + dev_err(&pdev->dev, "SysFS group creation failed\n"); + kbase_platform_device_remove(pdev); + return err; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); + kbdev->inited_subsys |= inited_sysfs_group; - return 0; -} +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) { + kbdev->inited_subsys |= inited_buslogger; + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + } else { + dev_warn(kbdev->dev, "Bus log client registration failed\n"); + err = 0; + } +#endif -static int kbase_platform_device_remove(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + dev_info(kbdev->dev, + "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - if (!kbdev) - return -ENODEV; + kbase_dev_nr++; - return kbase_common_device_remove(kbdev); + return err; } /** Suspend callback from the OS. diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index 0fc5ff95234e..c6008dbbc03c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -187,6 +187,8 @@ #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) /* Atom requires GPU to be in secure mode */ #define KBASE_KATOM_FLAG_SECURE (1<<11) +/* Atom has been stored in linked list */ +#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -396,6 +398,14 @@ struct kbase_jd_atom { #ifdef CONFIG_DEBUG_FS struct base_job_fault_event fault_event; #endif + + struct list_head queue; + + struct kbase_va_region *jit_addr_reg; + + /* If non-zero, this indicates that the atom will fail with the set + * event_code when the atom is processed. */ + enum base_jd_event_code will_fail_event_code; }; static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) @@ -476,6 +486,7 @@ typedef u32 kbase_as_poke_state; struct kbase_mmu_setup { u64 transtab; u64 memattr; + u64 transcfg; }; /** @@ -494,6 +505,7 @@ struct kbase_as { enum kbase_mmu_fault_type fault_type; u32 fault_status; u64 fault_addr; + u64 fault_extra_addr; struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; @@ -791,9 +803,7 @@ struct kbase_device { int irq; int flags; } irqs[3]; -#ifdef CONFIG_HAVE_CLK struct clk *clock; -#endif #ifdef CONFIG_REGULATOR struct regulator *regulator; #endif @@ -879,7 +889,7 @@ struct kbase_device { s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ /* Structure used for instrumentation and HW counters dumping */ - struct { + struct kbase_hwcnt { /* The lock should be used when accessing any of the following members */ spinlock_t lock; @@ -1017,11 +1027,23 @@ struct kbase_device { /* defaults for new context created for this device */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active_default; +#else u32 infinite_cache_active_default; +#endif size_t mem_pool_max_size_default; /* system coherency mode */ u32 system_coherency; + /* Flag to track when cci snoops have been enabled on the interface */ + bool cci_snoop_enabled; + + /* SMC function IDs to call into Trusted firmware to enable/disable + * cache snooping. Value of 0 indicates that they are not used + */ + u32 snoop_enable_smc; + u32 snoop_disable_smc; /* Secure operations */ struct kbase_secure_ops *secure_ops; @@ -1050,14 +1072,31 @@ struct kbase_device { #endif /* Boolean indicating if an IRQ flush during reset is in progress. */ bool irq_reset_flush; + + /* list of inited sub systems. Used during terminate/error recovery */ + u32 inited_subsys; }; -/* JSCTX ringbuffer size must always be a power of 2 */ -#define JSCTX_RB_SIZE 256 -#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1) +/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be: + - >=2 (buffer size -> 4) + - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer + but this is unnecessary as max atoms is 256) + */ +#define JSCTX_RB_IDX_SHIFT (8U) +#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32)) +#error "Invalid ring buffer size for 32bit atomic." +#endif +#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */ +#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT) +#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U) +#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U) + +#define JSCTX_WR_OFFSET (0U) +#define JSCTX_RN_OFFSET (JSCTX_WR_OFFSET + JSCTX_RB_IDX_SHIFT) +#define JSCTX_RD_OFFSET (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT) /** - * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer + * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue. * @atom_id: Atom ID */ struct jsctx_rb_entry { @@ -1065,32 +1104,52 @@ struct jsctx_rb_entry { }; /** - * struct jsctx_rb - JS context atom ring buffer + * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list. * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct * kbase_jd_atom pointers which make up the contents of the ring * buffer. - * @read_idx: Index into @entries. Indicates the next entry in @entries to - * read, and is incremented when pulling an atom, and decremented - * when unpulling. - * HW access lock must be held when accessing. - * @write_idx: Index into @entries. Indicates the next entry to use when - * adding atoms into the ring buffer, and is incremented when - * adding a new atom. - * jctx->lock must be held when accessing. - * @running_idx: Index into @entries. Indicates the last valid entry, and is - * incremented when remving atoms from the ring buffer. - * HW access lock must be held when accessing. + * @indicies: An atomic variable containing indicies for the ring buffer. + * Indicies are of size JSCTX_RB_IDX_SHIFT. + * The following are contained: + * - WR_IDX - Write index. Index of the NEXT slot to be written. + * - RN_IDX - Running index. Index of the tail of the list. + * This is the atom that has been running the longest. + * - RD_IDX - Read index. Index of the next atom to be pulled. + * @queue_head: Head item of the linked list queue. * - * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom. + * Locking: + * The linked list assumes jctx.lock is held. + * The ringbuffer serves as an intermediary between irq context and non-irq + * context, without the need for the two to share any lock. irq context can + * pull (and unpull) and only requires the runpool_irq.lock. While non-irq + * context can add and remove and only requires holding only jctx.lock. + * Error handling affecting both, or the whole ringbuffer in general, must + * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread + * is accessing the buffer. + * This means that RD_IDX is updated by irq-context (pull and unpull) and must + * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by + * non-irq context and must hold jctx.lock. + * Note that pull (or sister function peek) must also access WR_IDX to ensure + * there is free space in the buffer, this is ok as WR_IDX is only increased. + * A similar situation is apparent with unpull and RN_IDX, but only one atom + * (already pulled) can cause either remove or unpull, so this will never + * conflict. + * + * &struct jsctx_queue is a queue of &struct kbase_jd_atom, + * part ringbuffer and part linked list. */ -struct jsctx_rb { +struct jsctx_queue { struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; - u16 read_idx; /* HW access lock must be held when accessing */ - u16 write_idx; /* jctx->lock must be held when accessing */ - u16 running_idx; /* HW access lock must be held when accessing */ + atomic_t indicies; + + struct list_head queue_head; }; + + + + #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) @@ -1102,10 +1161,12 @@ struct kbase_context { unsigned long api_version; phys_addr_t pgd; struct list_head event_list; + struct list_head event_coalesce_list; struct mutex event_mutex; atomic_t event_closed; struct workqueue_struct *event_workq; atomic_t event_count; + int event_coalesce_count; bool is_compat; @@ -1116,6 +1177,7 @@ struct kbase_context { struct page *aliasing_sink_page; + struct mutex mmu_lock; struct mutex reg_lock; /* To be converted to a rwlock? */ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ @@ -1132,7 +1194,12 @@ struct kbase_context { struct kbase_mem_pool mem_pool; + struct shrinker reclaim; + struct list_head evict_list; + struct mutex evict_lock; + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; #ifdef CONFIG_KDS struct list_head waiting_kds_resource; #endif @@ -1157,6 +1224,8 @@ struct kbase_context { * All other flags must be added there */ spinlock_t mm_update_lock; struct mm_struct *process_mm; + /* End of the SAME_VA zone */ + u64 same_va_end; #ifdef CONFIG_MALI_TRACE_TIMELINE struct kbase_trace_kctx_timeline timeline; @@ -1182,7 +1251,7 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_rb jsctx_rb + struct jsctx_queue jsctx_queue [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; /* Number of atoms currently pulled from this context */ @@ -1193,7 +1262,11 @@ struct kbase_context { bool pulled; /* true if infinite cache is to be enabled for new allocations. Existing * allocations will not change. bool stored as a u32 per Linux API */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active; +#else u32 infinite_cache_active; +#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; @@ -1220,6 +1293,49 @@ struct kbase_context { /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ bool ctx_runnable_ref; + + /* Waiting soft-jobs will fail when this timer expires */ + struct hrtimer soft_event_timeout; + + /* JIT allocation management */ + struct kbase_va_region *jit_alloc[255]; + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; + struct mutex jit_lock; + struct work_struct jit_work; + + /* External sticky resource management */ + struct list_head ext_res_meta_head; +}; + +/** + * struct kbase_ctx_ext_res_meta - Structure which binds an external resource + * to a @kbase_context. + * @ext_res_node: List head for adding the metadata to a + * @kbase_context. + * @alloc: The physical memory allocation structure + * which is mapped. + * @gpu_addr: The GPU virtual address the resource is + * mapped to. + * @refcount: Refcount to keep track of the number of + * active mappings. + * + * External resources can be mapped into multiple contexts as well as the same + * context multiple times. + * As kbase_va_region itself isn't refcounted we can't attach our extra + * information to it as it could be removed under our feet leaving external + * resources pinned. + * This metadata structure binds a single external resource to a single + * context, ensuring that per context refcount is tracked separately so it can + * be overridden when needed and abuses by the application (freeing the resource + * multiple times) don't effect the refcount of the physical allocation. + */ +struct kbase_ctx_ext_res_meta { + struct list_head ext_res_node; + struct kbase_mem_phy_alloc *alloc; + u64 gpu_addr; + u64 refcount; }; enum kbase_reg_access_type { @@ -1260,4 +1376,29 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) /* Maximum number of times a job can be replayed */ #define BASEP_JD_REPLAY_LIMIT 15 +/* JobDescriptorHeader - taken from the architecture specifications, the layout + * is currently identical for all GPU archs. */ +struct job_descriptor_header { + u32 exception_status; + u32 first_incomplete_task; + u64 fault_pointer; + u8 job_descriptor_size : 1; + u8 job_type : 7; + u8 job_barrier : 1; + u8 _reserved_01 : 1; + u8 _reserved_1 : 1; + u8 _reserved_02 : 1; + u8 _reserved_03 : 1; + u8 _reserved_2 : 1; + u8 _reserved_04 : 1; + u8 _reserved_05 : 1; + u16 job_index; + u16 job_dependency_index_1; + u16 job_dependency_index_2; + union { + u64 _64; + u32 _32; + } next_job; +}; + #endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index c22e099db6f9..c55779cbad4d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -145,8 +145,32 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev) int kbase_device_init(struct kbase_device * const kbdev) { int i, err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register) */ @@ -220,7 +244,11 @@ int kbase_device_init(struct kbase_device * const kbdev) kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); +#else kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ #ifdef CONFIG_MALI_DEBUG init_waitqueue_head(&kbdev->driver_inactive_wait); diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index 0d0c5258aaa4..bf8c304610eb 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,7 @@ #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -38,10 +35,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_atom_ctx(katom, kctx); kbase_tlstream_tl_del_atom(katom); -#endif katom->status = KBASE_JD_ATOM_STATE_UNUSED; @@ -147,6 +142,29 @@ static void kbase_event_process_noreport(struct kbase_context *kctx, } } +/** + * kbase_event_coalesce - Move pending events to the main event list + * @kctx: Context pointer + * + * kctx->event_list and kctx->event_coalesce_count must be protected + * by a lock unless this is the last thread using them + * (and we're about to terminate the lock). + * + * Return: The number of pending events moved to the main event list + */ +static int kbase_event_coalesce(struct kbase_context *kctx) +{ + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list + and reset it */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + + /* Return the number of events moved */ + return event_count; +} + void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { @@ -163,12 +181,24 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) return; } - mutex_lock(&ctx->event_mutex); - atomic_inc(&ctx->event_count); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - mutex_unlock(&ctx->event_mutex); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { + /* Don't report the event until other event(s) have completed */ + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); + ++ctx->event_coalesce_count; + mutex_unlock(&ctx->event_mutex); + } else { + /* Report the event and any pending events now */ + int event_count = 1; - kbase_event_wakeup(ctx); + mutex_lock(&ctx->event_mutex); + event_count += kbase_event_coalesce(ctx); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + atomic_add(event_count, &ctx->event_count); + mutex_unlock(&ctx->event_mutex); + + kbase_event_wakeup(ctx); + } } KBASE_EXPORT_TEST_API(kbase_event_post); @@ -185,8 +215,10 @@ int kbase_event_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); INIT_LIST_HEAD(&kctx->event_list); + INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); atomic_set(&kctx->event_count, 0); + kctx->event_coalesce_count = 0; atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); @@ -200,6 +232,8 @@ KBASE_EXPORT_TEST_API(kbase_event_init); void kbase_event_cleanup(struct kbase_context *kctx) { + int event_count; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(kctx->event_workq); @@ -212,6 +246,9 @@ void kbase_event_cleanup(struct kbase_context *kctx) * Note: use of kctx->event_list without a lock is safe because this must be the last * thread using it (because we're about to terminate the lock) */ + event_count = kbase_event_coalesce(kctx); + atomic_add(event_count, &kctx->event_count); + while (!list_empty(&kctx->event_list)) { struct base_jd_event_v2 event; diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index a2174b24ac3c..061564104b67 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -189,23 +189,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } /* If we are using any other device */ } else { - uint32_t nr_l2, nr_sc, j; + uint32_t nr_l2, nr_sc_bits, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; - nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; + nr_sc_bits = fls64(core_mask); /* The job manager and tiler sets of counters * are always present */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; - dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h index dc8af2d6e794..a962ecb3f9c6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,8 +77,9 @@ /* Helper macro to create a complete GPU_ID (new format) */ #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ version_status)) /* Helper macro to create a partial GPU_ID (new format) that identifies @@ -94,6 +95,7 @@ (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ GPU_ID2_PRODUCT_MODEL) +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index f2f93de9d2e8..de2461fb8de4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,6 +37,16 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + features = base_hw_features_tMIx; + break; + default: + features = base_hw_features_generic; + break; + } + } else { switch (product_id) { case GPU_ID_PI_TFRX: /* FALLTHROUGH */ @@ -65,7 +75,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) features = base_hw_features_generic; break; } - + } for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); @@ -84,6 +94,25 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; if (impl_tech != IMPLEMENTATION_MODEL) { + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id) { + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1): + issues = base_hw_issues_tMIx_r0p0_05dev0; + break; + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2): + issues = base_hw_issues_tMIx_r0p0; + break; + default: + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TMIX) { + issues = base_hw_issues_tMIx_r0p0; + } else { + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + } else { switch (gpu_id) { case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): issues = base_hw_issues_t60x_r0p0_15dev0; @@ -174,11 +203,24 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t82x_r1p0; break; default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); return -EINVAL; } + } } else { /* Software model */ + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + issues = base_hw_issues_model_tMIx; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } else { switch (product_id) { case GPU_ID_PI_T60X: issues = base_hw_issues_model_t60x; @@ -209,6 +251,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) gpu_id); return -EINVAL; } + } } dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/mali_kbase_ipa.c index 6ac97eb7937c..c579d0a589f7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_ipa.c +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,7 +135,7 @@ static void init_ipa_groups(struct kbase_ipa_context *ctx) memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups)); } -#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) +#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; @@ -145,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) size_t i; int err; - np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups"); + np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups"); if (!np) return 0; diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 4b11861063d7..c091ffef643f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,6 @@ #endif #include #include -#ifdef CONFIG_UMP -#include -#endif /* CONFIG_UMP */ #include #include #include @@ -35,10 +32,7 @@ #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -91,6 +85,10 @@ static int jd_run_atom(struct kbase_jd_atom *katom) return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ + if (katom->will_fail_event_code) { + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (!kbase_replay_process(katom)) @@ -98,9 +96,6 @@ static int jd_run_atom(struct kbase_jd_atom *katom) } else if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - } else { - /* The job has not completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); } return 0; } @@ -199,208 +194,6 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) } #endif /* CONFIG_KDS */ -static int kbase_jd_user_buf_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - long pinned_pages; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - phys_addr_t *pa; - long i; - int err = -ENOMEM; - unsigned long address; - struct task_struct *owner; - struct device *dev; - unsigned long offset; - unsigned long local_size; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; - owner = alloc->imported.user_buf.owner; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - - pages = alloc->imported.user_buf.pages; - - down_read(&owner->mm->mmap_sem); - pinned_pages = get_user_pages(owner, owner->mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); - up_read(&owner->mm->mmap_sem); - - if (pinned_pages <= 0) - return pinned_pages; - - if (pinned_pages != alloc->imported.user_buf.nr_pages) { - for (i = 0; i < pinned_pages; i++) - put_page(pages[i]); - return -ENOMEM; - } - - dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; - - for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto unwind; - - alloc->imported.user_buf.dma_addrs[i] = dma_addr; - pa[i] = page_to_phys(pages[i]); - - local_size -= min; - offset = 0; - } - - alloc->nents = pinned_pages; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags); - if (err == 0) - return 0; - - alloc->nents = 0; - /* fall down */ -unwind: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - put_page(pages[i]); - pages[i] = NULL; - } - - return err; -} - -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) -{ - long i; - struct page **pages; - unsigned long size = alloc->imported.user_buf.size; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - pages = alloc->imported.user_buf.pages; - for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; - dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, - DMA_BIDIRECTIONAL); - if (writeable) - set_page_dirty_lock(pages[i]); - put_page(pages[i]); - pages[i] = NULL; - - size -= local_size; - } - alloc->nents = 0; -} - -/* not to use sg_dma_len. */ -#define MALI_SG_DMA_LEN(sg) ((sg)->length) - -#ifdef CONFIG_DMA_SHARED_BUFFER -static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - struct sg_table *sgt; /* scatterlist_table */ - struct scatterlist *s; - int i; - phys_addr_t *pa; - int err; - size_t count = 0; - struct kbase_mem_phy_alloc *alloc; - - alloc = reg->gpu_alloc; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); - KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); - - if (IS_ERR_OR_NULL(sgt)) - return -EINVAL; - - /* save for later */ - alloc->imported.umm.sgt = sgt; - - pa = kbase_get_gpu_phy_pages(reg); - KBASE_DEBUG_ASSERT(pa); - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - int j; - /* size_t pages = PFN_UP(sg_dma_len(s)); */ - size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); - - WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), - "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", - MALI_SG_DMA_LEN(s)); - /* - WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), - "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", - sg_dma_len(s)); - */ - - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); - - for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) - *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); - - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); - } - - if (WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu, count : %lu, reg->nr_pages : %lu. \n", - alloc->imported.umm.dma_buf->size, - count, - reg->nr_pages)) { - err = -EINVAL; - goto out; - } - - /* Update nents as we now have pages to map */ - alloc->nents = count; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); - -out: - if (err) { - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - } - - return err; -} - -static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(alloc); - KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); - KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - alloc->nents = 0; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { #ifdef CONFIG_KDS @@ -441,56 +234,12 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg; - switch (alloc->type) { -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } - break; -#endif /* CONFIG_DMA_SHARED_BUFFER */ - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - alloc->imported.user_buf.current_mapping_usage_count--; - - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_user_buf_unmap(katom->kctx, - alloc, - reg->flags & KBASE_REG_GPU_WR); - } - } - break; - default: - break; - } - kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + kbase_unmap_external_resource(katom->kctx, reg, alloc); } kfree(katom->extres); katom->extres = NULL; @@ -498,24 +247,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) kbase_gpu_vm_unlock(katom->kctx); } -#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) -static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive) -{ - u32 i; - - for (i = 0; i < *kds_res_count; i++) { - /* Duplicate resource, ignore */ - if (kds_resources[i] == kds_res) - return; - } - - kds_resources[*kds_res_count] = kds_res; - if (exclusive) - set_bit(*kds_res_count, kds_access_bitmap); - (*kds_res_count)++; -} -#endif - /* * Set up external resources needed by this job. * @@ -580,13 +311,20 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } #endif /* CONFIG_KDS */ + /* Take the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { struct base_external_resource *res; struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; + bool exclusive; res = &input_extres[res_no]; + exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + ? true : false; reg = kbase_region_tracker_find_region_enclosing_address( katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); @@ -599,76 +337,17 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && (reg->flags & KBASE_REG_SECURE)) { katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; - if ((katom->core_req & BASE_JD_REQ_FS) == 0) { - WARN_RATELIMIT(1, "Secure non-fragment jobs not supported"); - goto failed_loop; - } } - /* decide what needs to happen for this resource */ - switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { - /* use a local variable to not pollute - * err_ret_val with a potential success - * value as some other gotos depend on - * the default error code stored in - * err_ret_val */ - int tmp; - - tmp = kbase_jd_user_buf_map(katom->kctx, - reg); - if (0 != tmp) { - /* failed to map this buffer, - * roll back */ - err_ret_val = tmp; - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto failed_loop; - } - } - } - break; - case BASE_MEM_IMPORT_TYPE_UMP: { -#if defined(CONFIG_KDS) && defined(CONFIG_UMP) - struct kds_resource *kds_res; - - kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, - kds_access_bitmap, - res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ - break; - } -#ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: { -#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS - struct kds_resource *kds_res; - - kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif - reg->gpu_alloc->imported.umm.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - /* use a local variable to not pollute err_ret_val - * with a potential success value as some other gotos depend - * on the default error code stored in err_ret_val */ - int tmp; - - tmp = kbase_jd_umm_map(katom->kctx, reg); - if (tmp) { - /* failed to map this buffer, roll back */ - err_ret_val = tmp; - reg->gpu_alloc->imported.umm.current_mapping_usage_count--; - goto failed_loop; - } - } - break; - } + alloc = kbase_map_external_resource(katom->kctx, reg, + current->mm +#ifdef CONFIG_KDS + , &kds_res_count, kds_resources, + kds_access_bitmap, exclusive #endif - default: + ); + if (!alloc) { + err_ret_val = -EINVAL; goto failed_loop; } @@ -679,12 +358,15 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st * until the last read for an element. * */ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + katom->extres[res_no].alloc = alloc; } /* successfully parsed the extres array */ /* drop the vm lock before we call into kds */ kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + #ifdef CONFIG_KDS if (kds_res_count) { int wait_failed; @@ -716,6 +398,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #ifdef CONFIG_KDS failed_kds_setup: + /* Lock the processes mmap lock */ + down_read(¤t->mm->mmap_sem); /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); @@ -725,30 +409,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* undo the loop work */ while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; -#ifdef CONFIG_DMA_SHARED_BUFFER - if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages(katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(alloc); + kbase_unmap_external_resource(katom->kctx, NULL, alloc); } kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + early_err_out: kfree(katom->extres); katom->extres = NULL; @@ -761,8 +429,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, - u8 d, - bool ctx_is_dying) + u8 d) { u8 other_d = !d; @@ -779,12 +446,7 @@ static inline void jd_resolve_dep(struct list_head *out_list, kbase_jd_katom_dep_clear(&dep_atom->dep[d]); if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) { - /* Atom failed, so remove the other dependencies and immediately fail the atom */ - if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { - list_del(&dep_atom->dep_item[other_d]); - kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]); - } + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { #ifdef CONFIG_KDS if (!dep_atom->kds_dep_satisfied) { /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and @@ -797,10 +459,17 @@ static inline void jd_resolve_dep(struct list_head *out_list, dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); - dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&dep_atom->dep_item[0], out_list); - } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) + != BASE_JD_REQ_SOFT_REPLAY) { + dep_atom->will_fail_event_code = + dep_atom->event_code; + } else { + dep_atom->status = + KBASE_JD_ATOM_STATE_COMPLETED; + } + } + if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { #ifdef CONFIG_KDS if (dep_atom->kds_dep_satisfied) #endif @@ -873,7 +542,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; struct list_head completed_jobs; struct list_head runnable_jobs; bool need_to_try_schedule_context = false; @@ -888,7 +556,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, jd_check_force_failure(katom); #endif - /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -924,8 +591,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) - jd_resolve_dep(&runnable_jobs, katom, i, - js_kctx_info->ctx.is_dying); + jd_resolve_dep(&runnable_jobs, katom, i); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -940,7 +606,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); - if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) { + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && + !kctx->jctx.sched_info.ctx.is_dying) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; @@ -965,6 +632,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_add_tail(&node->dep_item[0], &completed_jobs); } + /* Completing an atom might have freed up space + * in the ringbuffer, but only on that slot. */ + jsctx_ll_flush_to_rb(kctx, + katom->sched_priority, + katom->slot_nr); + /* Register a completed job as a disjoint event when the GPU * is in a disjoint state (ie. being reset or replaying jobs). */ @@ -1052,6 +725,7 @@ bool jd_submit_atom(struct kbase_context *kctx, int i; int sched_prio; bool ret; + bool will_fail = false; /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ @@ -1075,6 +749,7 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->need_cache_flush_cores_retained = 0; katom->x_pre_dep = NULL; katom->x_post_dep = NULL; + katom->will_fail_event_code = 0; #ifdef CONFIG_KDS /* Start by assuming that the KDS dependencies are satisfied, * kbase_jd_pre_external_resources will correct this if there are dependencies */ @@ -1096,7 +771,7 @@ bool jd_submit_atom(struct kbase_context *kctx, dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any * dependencies. */ @@ -1105,7 +780,7 @@ bool jd_submit_atom(struct kbase_context *kctx, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx( katom, kctx); -#endif + ret = jd_done_nolock(katom, NULL); goto out; } @@ -1138,16 +813,10 @@ bool jd_submit_atom(struct kbase_context *kctx, continue; } - if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) { - /* Remove the previous dependency */ - list_del(&katom->dep_item[0]); - kbase_jd_katom_dep_clear(&katom->dep[0]); - } - /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + /* This atom is going through soft replay or * will be sent back to user space. Do not record any * dependencies. */ @@ -1155,7 +824,7 @@ bool jd_submit_atom(struct kbase_context *kctx, katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); -#endif + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) { @@ -1163,9 +832,8 @@ bool jd_submit_atom(struct kbase_context *kctx, goto out; } } - ret = jd_done_nolock(katom, NULL); + will_fail = true; - goto out; } else { /* Atom is in progress, add this atom to the list */ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); @@ -1174,12 +842,25 @@ bool jd_submit_atom(struct kbase_context *kctx, } } - /* These must occur after the above loop to ensure that an atom that - * depends on a previous atom with the same number behaves as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; + if (will_fail) { + if (!queued) { + ret = jd_done_nolock(katom, NULL); + + goto out; + } else { + katom->will_fail_event_code = katom->event_code; + ret = false; + + goto out; + } + } else { + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + } -#if defined(CONFIG_MALI_MIPE_ENABLED) /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, @@ -1192,7 +873,6 @@ bool jd_submit_atom(struct kbase_context *kctx, (void *)kbase_jd_katom_dep_atom( &katom->dep[i]), (void *)katom); -#endif /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { @@ -1283,8 +963,7 @@ bool jd_submit_atom(struct kbase_context *kctx, ret = jd_done_nolock(katom, NULL); goto out; } - /* The job has not yet completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + ret = false; } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; @@ -1522,7 +1201,6 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&jctx->lock); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -1530,6 +1208,7 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&jctx->lock); return; } @@ -1714,51 +1393,6 @@ static void jd_cancel_worker(struct work_struct *data) kbase_js_sched_all(kbdev); } -/** - * jd_evict_worker - Work queue job evict function - * @data: a &struct work_struct - * - * Only called as part of evicting failed jobs. This is only called on jobs that - * were never submitted to HW Access. Jobs that were submitted are handled - * through kbase_jd_done_worker(). - * Operates serially with the kbase_jd_done_worker() on the work queue. - * - * We don't need to release most of the resources that would occur on - * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be - * running (by virtue of having not been submitted to HW Access). - */ -static void jd_evict_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); - struct kbase_jd_context *jctx; - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - struct kbase_device *kbdev; - - /* Soft jobs should never reach this function */ - KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); - - kctx = katom->kctx; - kbdev = kctx->kbdev; - jctx = &kctx->jctx; - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); - - /* Scheduler: Remove the job from the system */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbasep_js_remove_cancelled_job(kbdev, kctx, katom); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&jctx->lock); - jd_done_nolock(katom, NULL); - /* katom may have been freed now, do not use! */ - mutex_unlock(&jctx->lock); - - kbase_js_sched_all(kbdev); -} - /** * kbase_jd_done - Complete a job that has been removed from the Hardware * @katom: atom which has been completed @@ -1802,7 +1436,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, #ifdef CONFIG_DEBUG_FS /* a failed job happened and is waiting for dumping*/ - if (kbase_debug_job_fault_process(katom, katom->event_code)) + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) return; #endif @@ -1840,30 +1475,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != katom); - kctx = katom->kctx; - KBASE_DEBUG_ASSERT(NULL != kctx); - - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); - - /* This should only be done from a context that is currently scheduled - */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); - - WARN_ON(work_pending(&katom->work)); - - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, jd_evict_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} void kbase_jd_zap_context(struct kbase_context *kctx) { @@ -1886,6 +1497,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx) * queued outside the job scheduler. */ + hrtimer_cancel(&kctx->soft_event_timeout); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); kbase_cancel_soft_job(katom); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index 78761e69d04d..83228c05883b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include @@ -79,13 +77,6 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_policy_ctx_job_cb callback); -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx); - /* Helper for trace subcodes */ #if KBASE_TRACE_ENABLE static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, @@ -239,25 +230,31 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, return result; } -/** - * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority to check. - * - * Caller must hold runpool_irq.lock - * - * Return: true if the ring buffer is empty, false otherwise. - */ -static inline bool -jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; +/* Helper macros to access and modify jsctx_queue.indicies */ +#define JSCTX_GET(offset, var, mask) \ + ((var >> offset) & mask) - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); +/* This wraps around to correct integer size automatically. */ +#define JSCTX_SET(var, offset, value, mask) \ + (var = ((var & ~(mask << offset)) /*Clear old bits */ \ + | (((value) & mask) << offset))) /* Set (after masking) new bits */ - return rb->running_idx == rb->write_idx; -} +#define JSCTX_GET_WR_IDX(var) \ + JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE) +#define JSCTX_GET_RN_IDX(var) \ + JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE) +#define JSCTX_GET_RD_IDX(var) \ + JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE) + +#define JSCTX_GET_IDX_DIFF(lower, upper) \ + ((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower)) + +#define JSCTX_SET_WR_IDX(var, value) \ + JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE) +#define JSCTX_SET_RN_IDX(var, value) \ + JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE) +#define JSCTX_SET_RD_IDX(var, value) \ + JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE) /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms @@ -270,18 +267,15 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) * ring buffer to be full (with running atoms) when this functions returns * true. * - * Caller must hold runpool_irq.lock - * * Return: true if there are no atoms to pull, false otherwise. */ static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int var = atomic_read(&rb->indicies); - return rb->read_idx == rb->write_idx; + return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var); } /** @@ -311,55 +305,29 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) } /** - * jsctx_rb_compact_prio(): - Compact a ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - * @prio: Priority id to compact. - */ -static inline void -jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio) + * jsctx_rb_is_full(): - Check if the given ringbuffer is full. + * @queue: Pointer to the queue containing the ringbuffer. + * + * No locks explicitly required, result will always be consistent. + * But depending on usage, the caller should consider jctx.lock, + * for the result to remain correct. + * + * Return: true if the ringbuffer is full, false otherwise. + */ +static inline bool +jsctx_rb_is_full(struct jsctx_queue *queue) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - u16 compact_idx = rb->write_idx - 1; - u16 end_idx = rb->running_idx - 1; - u16 i; + unsigned int var = atomic_read(&queue->indicies); + u16 rn_idx = JSCTX_GET_RN_IDX(var); + u16 wr_idx = JSCTX_GET_WR_IDX(var); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); - - for (i = compact_idx; i != end_idx; i--) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id != - KBASEP_ATOM_ID_INVALID) { - WARN_ON(compact_idx < rb->running_idx); - rb->entries[compact_idx & JSCTX_RB_MASK].atom_id = - rb->entries[i & JSCTX_RB_MASK].atom_id; - - compact_idx--; - } - if (rb->read_idx == i) - rb->read_idx = compact_idx + 1; - } - - rb->running_idx = compact_idx + 1; + return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE; } -/** - * jsctx_rb_compact(): - Compact all priority ring buffers - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - */ -static inline void -jsctx_rb_compact(struct kbase_context *kctx, int js) -{ - int prio; - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_compact_prio(kctx, js, prio); -} /** - * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. + * @kctx: Pointer to kbase context with the queue. * @js: Job slot id to iterate. * @prio: Priority id to iterate. * @callback: Function pointer to callback. @@ -376,50 +344,64 @@ jsctx_rb_compact(struct kbase_context *kctx, int js) * calling this function. */ static void -jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio, +jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, kbasep_js_policy_ctx_job_cb callback) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct kbase_jd_atom *katom; - u16 write_idx = ACCESS_ONCE(rb->write_idx); + + struct list_head *pos, *q; + + unsigned int var = atomic_read(&queue->indicies); + u16 running_idx = JSCTX_GET_RN_IDX(var); + u16 read_idx = JSCTX_GET_RD_IDX(var); + u16 wr_idx = JSCTX_GET_WR_IDX(var); + u16 i; + const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* There must be no jobs currently in HW access */ - WARN_ON(rb->read_idx != rb->running_idx); + WARN_ON(read_idx != JSCTX_GET_RN_IDX(var)); /* Invoke callback on all kbase_jd_atoms in the ring buffer, and * removes them from the buffer */ - while (rb->read_idx != write_idx) { - int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + for (i = 0; i < count; i++) { + int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id; katom = kbase_jd_atom_from_id(kctx, id); + read_idx++; + callback(kctx->kbdev, katom); + } + atomic_set(&queue->indicies, 0); - rb->read_idx++; - rb->running_idx++; + list_for_each_safe(pos, q, &queue->queue_head) { + struct kbase_jd_atom *entry; - callback(kctx->kbdev, katom); + entry = list_entry(pos, struct kbase_jd_atom, queue); + list_del(pos); + callback(kctx->kbdev, entry); } } /** - * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach(): - Execute callback for each entry in every queue + * @kctx: Pointer to kbase context with queue. * @js: Job slot id to iterate. * @callback: Function pointer to callback. * * Iterate over all the different priorities, and for each call - * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback - * for each entry in buffer, and remove the entry from the buffer. + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. */ static inline void -jsctx_rb_foreach(struct kbase_context *kctx, int js, +jsctx_queue_foreach(struct kbase_context *kctx, int js, kbasep_js_policy_ctx_job_cb callback) { int prio; for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_foreach_prio(kctx, js, prio, callback); + jsctx_queue_foreach_prio(kctx, js, prio, callback); } /** @@ -436,15 +418,16 @@ jsctx_rb_foreach(struct kbase_context *kctx, int js, static inline struct kbase_jd_atom * jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; int id; + unsigned int var = atomic_read(&rb->indicies); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (jsctx_rb_none_to_pull_prio(kctx, js, prio)) + if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var)) return NULL; - id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id; return kbase_jd_atom_from_id(kctx, id); } @@ -457,6 +440,8 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * + * Caller must hold the runpool_irq.lock. + * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ static inline struct kbase_jd_atom * @@ -464,6 +449,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -491,16 +478,21 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) static inline struct kbase_jd_atom * jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int var = atomic_read(&rb->indicies); int id; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&kctx->jctx.lock); - if (jsctx_rb_is_empty_prio(kctx, js, prio)) + if (!list_empty(&rb->queue_head)) { + return list_entry(rb->queue_head.prev, + struct kbase_jd_atom, queue); + } + + if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var)) return NULL; - id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id; + id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id; return kbase_jd_atom_from_id(kctx, id); } @@ -518,14 +510,23 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - rb->read_idx++; + do { + u16 rd_idx; + + oldvar = atomic_read(&rb->indicies); + var = oldvar; + rd_idx = JSCTX_GET_RD_IDX(var); + + JSCTX_SET_RD_IDX(var, rd_idx+1); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } /** @@ -543,15 +544,27 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* Atoms must be unpulled in correct order. */ - WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); + do { + u16 rd_idx; + + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + + rd_idx = JSCTX_GET_RD_IDX(var)-1; - rb->read_idx--; + /* Atoms must be unpulled in correct order. */ + WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); + + JSCTX_SET_RD_IDX(var, rd_idx); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } /** @@ -571,18 +584,36 @@ jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int oldvar, var; + u16 wr_idx, running_idx, count; lockdep_assert_held(&kctx->jctx.lock); + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + running_idx = JSCTX_GET_RN_IDX(var); + wr_idx = JSCTX_GET_WR_IDX(var); + count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); + /* Check if the ring buffer is full */ - if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE) + if (count >= JSCTX_RB_SIZE) return -EBUSY; - rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id = - kbase_jd_atom_id(kctx, katom); - rb->write_idx++; + rb->entries[wr_idx & JSCTX_RB_MASK].atom_id = + kbase_jd_atom_id(kctx, katom); + + wr_idx++; + JSCTX_SET_WR_IDX(var, wr_idx); + while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) { + oldvar = atomic_read(&rb->indicies); + var = oldvar; + wr_idx = JSCTX_GET_WR_IDX(var)+1; + + JSCTX_SET_WR_IDX(var, wr_idx); + } return 0; } @@ -602,73 +633,129 @@ jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); - /* Atoms must be completed in order. */ - WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); - rb->running_idx++; + do { + unsigned int rn_idx; + + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + rn_idx = JSCTX_GET_RN_IDX(var); + + JSCTX_SET_RN_IDX(var, rn_idx+1); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } -/** - * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @start_katom: Pointer to the first katom to evict. - * @head_katom: Pointer to head katom. - * @evict_list: Pointer to head of list where evicted atoms are added. - * - * Iterate over the ring buffer starting at @start_katom and evict @start_atom - * and dependent atoms in ring buffer. - * - * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will - * examine the atom dependencies. - * - * jsctx_rb_evict() is only called by kbase_js_evict_deps(). - */ + static void -jsctx_rb_evict(struct kbase_context *kctx, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list) +jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - int prio = start_katom->sched_priority; - int js = start_katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - bool atom_in_rb = false; - u16 i, start_idx; + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&kctx->jctx.lock); - for (i = rb->running_idx; i != rb->write_idx; i++) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id == - kbase_jd_atom_id(kctx, start_katom)) { - start_idx = i; - atom_in_rb = true; + list_add_tail(&katom->queue, &queue->queue_head); +} + +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, + int js, + bool is_scheduled); +static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); +static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); + +void +jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js) +{ + unsigned long flags; + struct list_head *pos, *q; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + bool flushed_any = false; + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + bool enqueue_required = false; + + lockdep_assert_held(&kctx->jctx.lock); + + + /* Early out for common case */ + if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue)) + return; + + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags); + /* If slot will transition from unpullable to pullable then add to + * pullable list */ + if (jsctx_rb_none_to_pull(kctx, js)) + enqueue_required = true; + else + enqueue_required = false; + + list_for_each_safe(pos, q, &queue->queue_head) { + struct kbase_jd_atom *katom; + + katom = list_entry(pos, struct kbase_jd_atom, queue); + + KBASE_DEBUG_ASSERT(katom); + + if (jsctx_rb_add_atom(kctx, katom)) break; - } + + katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + flushed_any = true; + + list_del(pos); } - /* start_katom must still be in ring buffer. */ - if (i == rb->write_idx || !atom_in_rb) - return; - /* Evict all dependencies on same slot. */ - for (i = start_idx; i != rb->write_idx; i++) { - u8 katom_evict; + if (flushed_any) { + bool timer_sync = false; - katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id; - if (katom_evict != KBASEP_ATOM_ID_INVALID) { - if (!kbase_js_evict_atom(kctx, - &kctx->jctx.atoms[katom_evict], - start_katom, head_katom, - evict_list, rb, i)) - break; + if (enqueue_required) { + if (kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = kbase_js_ctx_list_add_pullable( + kctx->kbdev, kctx, js); + else + timer_sync = kbase_js_ctx_list_add_unpullable( + kctx->kbdev, kctx, js); + /* If this context is active and the atom is the first + * on its slot, kick the job manager to attempt to + * fast-start the atom */ + if (kctx == kctx->kbdev->hwaccess.active_kctx) + kbase_jm_try_kick(kctx->kbdev, 1 << js); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + + if (timer_sync) + kbase_backend_ctx_count_changed(kctx->kbdev); + + } else { + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); } + } else { + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } /* @@ -742,6 +829,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; jsdd->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; + atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -768,6 +856,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->cfs_ctx_runtime_init_slices); dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", jsdd->cfs_ctx_runtime_min_slices); + dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i", + atomic_read(&jsdd->soft_event_timeout_ms)); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && @@ -872,7 +962,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; int err; - int i; + int i, j; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -912,6 +1002,13 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) return -EINVAL; + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head); + atomic_set(&kctx->jsctx_queue[i][j].indicies, 0); + } + } + return 0; } @@ -1207,7 +1304,8 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, return false; /* next atom blocked */ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return false; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -1236,7 +1334,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) { + (KBASE_KATOM_FLAG_JSCTX_IN_LL | + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){ ret = false; break; } @@ -1410,17 +1509,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); - if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) { - /* Ringbuffer was full (should be impossible) - fail the job */ - --(js_kctx_info->ctx.nr_jobs); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&js_devdata->runpool_mutex); - - atom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - goto out_unlock; - } + enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt_nolock(kbdev, kctx)); @@ -1762,9 +1851,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_released(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); -#endif kbase_backend_release_ctx_irq(kbdev, kctx); @@ -2109,9 +2196,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_in_use(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); -#endif /* Cause any future waiter-on-termination to wait until the context is * descheduled */ @@ -2397,37 +2482,78 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, return 1; } -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required) +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom) { + bool enqueue_required; + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to * pullable list */ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { - *enqueue_required = true; + enqueue_required = true; } else { - *enqueue_required = false; + enqueue_required = false; } /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); /* Add atom to ring buffer. */ - if (unlikely(jsctx_rb_add_atom(kctx, katom))) { - /* The ring buffer is full. This should be impossible as the - * job dispatcher can not submit enough atoms to exceed the - * ring buffer size. Fail the job. - */ - WARN(1, "Job submit while JSCTX ringbuffer already full\n"); - return -EINVAL; + if (jsctx_rb_add_atom(kctx, katom)) { + jsctx_ll_add(kctx, katom); + enqueue_required = false; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL; + } else { + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; } + return enqueue_required; +} - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; +/** + * kbase_js_evict_deps - Evict dependencies of a failed atom. + * @kctx: Context pointer + * @katom: Pointer to the atom that has failed. + * @js: The job slot the katom was run on. + * @prio: Priority of the katom. + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold the HW access lock + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, int prio) +{ + struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio); - return 0; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + if (next_katom && + (next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) { + KBASE_DEBUG_ASSERT(next_katom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED); + + next_katom->will_fail_event_code = katom->event_code; + + } + + /* Has cross slot depenency. */ + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL | + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + /* Fail if it had a data dependency. */ + if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { + x_dep->will_fail_event_code = katom->event_code; + } + } } struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) @@ -2467,7 +2593,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return NULL; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -2510,6 +2637,8 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; + kbase_tlstream_aux_job_softstop_ex(katom); + kbase_backend_complete_wq(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) @@ -2601,112 +2730,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx) -{ - struct kbase_jd_atom *x_dep = katom_evict->x_post_dep; - - if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && - katom_evict != start_katom) - return false; - - if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - WARN_ON(katom_evict->event_code != head_katom->event_code); - - return false; - } - - if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED && - katom_evict != head_katom) - return false; - - /* Evict cross dependency if present */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - list_add_tail(&x_dep->dep_item[0], evict_list); - - /* If cross dependency is present and does not have a data dependency - * then unblock */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - - if (katom_evict != head_katom) { - rb->entries[idx & JSCTX_RB_MASK].atom_id = - KBASEP_ATOM_ID_INVALID; - - katom_evict->event_code = head_katom->event_code; - katom_evict->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; - - if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF) - kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL, - 0); - else - kbase_jd_evict(kctx->kbdev, katom_evict); - } - - return true; -} - -/** - * kbase_js_evict_deps - Evict dependencies - * @kctx: Context pointer - * @head_katom: Pointer to the atom to evict - * - * Remove all post dependencies of an atom from the context ringbuffers. - * - * The original atom's event_code will be propogated to all dependent atoms. - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *head_katom) -{ - struct list_head evict_list; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - INIT_LIST_HEAD(&evict_list); - - list_add_tail(&head_katom->dep_item[0], &evict_list); - - while (!list_empty(&evict_list)) { - struct kbase_jd_atom *start_katom; - - start_katom = list_entry(evict_list.prev, struct kbase_jd_atom, - dep_item[0]); - list_del(evict_list.prev); - - jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list); - } -} - -/** - * kbase_js_compact - Compact JSCTX ringbuffers - * @kctx: Context pointer - * - * Compact the JSCTX ringbuffers, removing any NULL entries - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_compact(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - int js; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_compact(kctx, js); -} - bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2730,9 +2753,6 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_evict_deps(kctx, katom); - jsctx_rb_remove(kctx, katom); context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -2744,10 +2764,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); } - - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_compact(kctx); } + WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL); if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && jsctx_rb_none_to_pull(kctx, atom_slot)) @@ -2811,13 +2829,21 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + if (katom->will_fail_event_code) + katom->event_code = katom->will_fail_event_code; + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, + katom->sched_priority); + } + #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, katom->slot_nr), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_atom_lpu( katom, &kbdev->gpu_props.props.raw_props.js_features[ @@ -2827,7 +2853,7 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) kctx, &kbdev->gpu_props.props.raw_props.js_features[ katom->slot_nr]); -#endif + /* Calculate the job's time used */ if (end_timestamp != NULL) { /* Only calculating it for jobs that really run on the HW (e.g. @@ -3194,7 +3220,6 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, u32 js; kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -3204,7 +3229,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, /* Invoke callback on jobs on each slot in turn */ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_foreach(kctx, js, callback); + jsctx_queue_foreach(kctx, js, callback); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 868c6808d628..bdb820a7e952 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -491,15 +491,22 @@ void kbasep_js_resume(struct kbase_device *kbdev); * @param[in] kctx Context pointer * @param[in] atom Pointer to the atom to submit * - * @return 0 if submit succeeded - * error code if the atom can not be submitted at this - * time, due to insufficient space in the ringbuffer, or dependencies - * that can not be represented. - */ -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required); + * @return Whether the context requires to be enqueued. */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); /** * @brief Pull an atom from a context in the job scheduler for execution. * diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index 04f7809f79d3..75d4b98b24a5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -348,6 +348,9 @@ struct kbasep_js_device_data { u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ + /**< Value for JS_SOFT_EVENT_TIMEOUT */ + atomic_t soft_event_timeout_ms; + /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -402,7 +405,7 @@ struct kbasep_js_kctx_info { * * You may not access any of these members from IRQ context. */ - struct { + struct kbase_jsctx { struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ /** Number of jobs ready to run - does \em not include the jobs waiting in diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index ffc12a538af6..385d56a8f53d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,9 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* CONFIG_DMA_SHARED_BUFFER */ - +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ #include #include #include @@ -36,10 +38,7 @@ #include #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif /** * @brief Check the zone compatibility of two regions. @@ -392,13 +391,33 @@ int kbase_add_va_region(struct kbase_context *kctx, { u64 start_pfn; - tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); - if (!tmp) { + /* + * Depending on the zone the allocation request is for + * we might need to retry it. + */ + do { + tmp = kbase_region_tracker_find_region_meeting_reqs( + kctx, reg, nr_pages, align); + if (tmp) { + start_pfn = (tmp->start_pfn + align - 1) & + ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, + tmp, start_pfn, nr_pages); + break; + } + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + if (!tmp) err = -ENOMEM; - goto exit; - } - start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); - err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); } exit: @@ -410,7 +429,10 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region); /** * @brief Initialize the internal region tracker data structure. */ -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, + struct kbase_va_region *same_va_reg, + struct kbase_va_region *exec_reg, + struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree = RB_ROOT; kbase_region_tracker_insert(kctx, same_va_reg); @@ -448,6 +470,11 @@ int kbase_region_tracker_init(struct kbase_context *kctx) size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); #if defined(CONFIG_ARM64) same_va_bits = VA_BITS; @@ -464,24 +491,29 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_bits = 33; #endif - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) - return -EINVAL; + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { + err = -EINVAL; + goto fail_unlock; + } + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, - (1ULL << (same_va_bits - PAGE_SHIFT)) - 1, + same_va_pages, KBASE_REG_ZONE_SAME_VA); - if (!same_va_reg) - return -ENOMEM; + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } #ifdef CONFIG_64BIT - /* only 32-bit clients have the other two zones */ + /* 32-bit clients have exec and custom VA zones */ if (kctx->is_compat) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - kbase_free_alloced_region(same_va_reg); - return -EINVAL; + err = -EINVAL; + goto fail_free_same_va; } /* If the current size of TMEM is out of range of the * virtual address space addressable by the MMU then @@ -496,8 +528,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) KBASE_REG_ZONE_EXEC); if (!exec_reg) { - kbase_free_alloced_region(same_va_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_same_va; } custom_va_reg = kbase_alloc_free_region(kctx, @@ -505,9 +537,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { - kbase_free_alloced_region(same_va_reg); - kbase_free_alloced_region(exec_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_exec; } #ifdef CONFIG_64BIT } @@ -515,7 +546,102 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + kctx->same_va_end = same_va_pages + 1; + + kbase_gpu_vm_unlock(kctx); return 0; + +fail_free_exec: + kbase_free_alloced_region(exec_reg); +fail_free_same_va: + kbase_free_alloced_region(same_va_reg); +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) +{ +#ifdef CONFIG_64BIT + struct kbase_va_region *same_va; + struct kbase_va_region *custom_va_reg; + u64 same_va_bits; + u64 total_va_size; + int err; + + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + if (kctx->is_compat) + return 0; + +#if defined(CONFIG_ARM64) + same_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + same_va_bits = 47; +#elif defined(CONFIG_64BIT) +#error Unsupported 64-bit architecture +#endif + + if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) + same_va_bits = 33; + + total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + + kbase_gpu_vm_lock(kctx); + + /* + * Modify the same VA free region after creation. Be careful to ensure + * that allocations haven't been made as they could cause an overlap + * to happen with existing same VA allocations and the custom VA zone. + */ + same_va = kbase_region_tracker_find_region_base_address(kctx, + PAGE_SIZE); + if (!same_va) { + err = -ENOMEM; + goto fail_unlock; + } + + /* The region flag or region size has changed since creation so bail. */ + if ((!(same_va->flags & KBASE_REG_FREE)) || + (same_va->nr_pages != total_va_size)) { + err = -ENOMEM; + goto fail_unlock; + } + + /* It's safe to adjust the same VA zone now */ + same_va->nr_pages -= jit_va_pages; + kctx->same_va_end -= jit_va_pages; + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ + custom_va_reg = kbase_alloc_free_region(kctx, + kctx->same_va_end, + jit_va_pages, + KBASE_REG_ZONE_CUSTOM_VA); + if (!custom_va_reg) { + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + err = -ENOMEM; + goto fail_unlock; + } + + kbase_region_tracker_insert(kctx, custom_va_reg); + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +#else + return 0; +#endif } int kbase_mem_init(struct kbase_device *kbdev) @@ -613,8 +739,46 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); */ void kbase_free_alloced_region(struct kbase_va_region *reg) { - KBASE_DEBUG_ASSERT(NULL != reg); if (!(reg->flags & KBASE_REG_FREE)) { + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release(reg->kctx, NULL, + reg->start_pfn << PAGE_SHIFT, true); + kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); /* To detect use-after-free in debug builds */ @@ -891,10 +1055,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle); + sset->mem_handle.basep.handle); if (!reg) { dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", - sset->mem_handle); + sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -908,7 +1072,7 @@ static int kbase_do_syncset(struct kbase_context *kctx, map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); if (!map) { dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle); + start, sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -989,6 +1153,28 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + + /* + * Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must unmake it + * before trying to free it. + * If the memory hasn't been reclaimed it will be unmapped and freed + * below, if it has been reclaimed then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + err = kbase_gpu_munmap(kctx, reg); if (err) { dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); @@ -1046,7 +1232,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) kbase_free_alloced_region(reg); } else { /* A real GPU va */ - /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (!reg || (reg->flags & KBASE_REG_FREE)) { @@ -1063,7 +1248,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) err = -EINVAL; goto out_unlock; } - err = kbase_mem_free_region(kctx, reg); } @@ -1145,11 +1329,9 @@ int kbase_alloc_phy_pages_helper( nr_pages_requested, alloc->pages + alloc->nents) != 0) goto no_alloc; -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)alloc->imported.kctx->id, (u64)new_page_count); -#endif alloc->nents += nr_pages_requested; done: @@ -1167,11 +1349,12 @@ int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { + struct kbase_context *kctx = alloc->imported.kctx; bool syncback; + bool reclaimed = (alloc->evicted != 0); phys_addr_t *start_free; int new_page_count __maybe_unused; - KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); @@ -1184,22 +1367,29 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool, + kbase_mem_pool_free_pages(&kctx->mem_pool, nr_pages_to_free, start_free, - syncback); + syncback, + reclaimed); alloc->nents -= nr_pages_to_free; - kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); - new_page_count = kbase_atomic_sub_pages( - nr_pages_to_free, &alloc->imported.kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_aux_pagesalloc( - (u32)alloc->imported.kctx->id, - (u64)new_page_count); -#endif + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + kbase_process_page_usage_dec(kctx, nr_pages_to_free); + new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); + } return 0; } @@ -1212,7 +1402,12 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + WARN_ON(!alloc->imported.kctx); + /* + * The physical allocation must have been removed from the + * eviction list before trying to free it. + */ + WARN_ON(!list_empty(&alloc->evict_node)); kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -1290,9 +1485,11 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; + reg->cpu_alloc->reg = reg; if (reg->cpu_alloc != reg->gpu_alloc) { if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) goto out_rollback; + reg->gpu_alloc->reg = reg; } return 0; @@ -1386,3 +1583,921 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) } KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; + struct kbase_context *kctx; + u64 active_value; + u64 pool_value; + u64 destroy_value; + char buffer[50]; +}; + +static int kbase_jit_debugfs_common_open(struct inode *inode, + struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ + struct kbase_jit_debugfs_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->func = func; + mutex_init(&data->lock); + data->kctx = (struct kbase_context *) inode->i_private; + + file->private_data = data; + + return nonseekable_open(inode, file); +} + +static ssize_t kbase_jit_debugfs_common_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_jit_debugfs_data *data; + size_t size; + int ret; + + data = (struct kbase_jit_debugfs_data *) file->private_data; + mutex_lock(&data->lock); + + if (*ppos) { + size = strnlen(data->buffer, sizeof(data->buffer)); + } else { + if (!data->func) { + ret = -EACCES; + goto out_unlock; + } + + if (data->func(data)) { + ret = -EACCES; + goto out_unlock; + } + + size = scnprintf(data->buffer, sizeof(data->buffer), + "%llu,%llu,%llu", data->active_value, + data->pool_value, data->destroy_value); + } + + ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + +out_unlock: + mutex_unlock(&data->lock); + return ret; +} + +static int kbase_jit_debugfs_common_release(struct inode *inode, + struct file *file) +{ + kfree(file->private_data); + return 0; +} + +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ +} + +static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct list_head *tmp; + + mutex_lock(&kctx->jit_lock); + list_for_each(tmp, &kctx->jit_active_head) { + data->active_value++; + } + + list_for_each(tmp, &kctx->jit_pool_head) { + data->pool_value++; + } + + list_for_each(tmp, &kctx->jit_destroy_head) { + data->destroy_value++; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, + kbase_jit_debugfs_count_get); + +static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->nr_pages; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, + kbase_jit_debugfs_vm_get); + +static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +void kbase_jit_debugfs_add(struct kbase_context *kctx) +{ + /* Debugfs entry for getting the number of JIT allocations. */ + debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_count_fops); + + /* + * Debugfs entry for getting the total number of virtual pages + * used by JIT allocations. + */ + debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_vm_fops); + + /* + * Debugfs entry for getting the number of physical pages used + * by JIT allocations. + */ + debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +} + +/** + * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations + * @work: Work item + * + * This function does the work of freeing JIT allocations whose physical + * backing has been released. + */ +static void kbase_jit_destroy_worker(struct work_struct *work) +{ + struct kbase_context *kctx; + struct kbase_va_region *reg; + + kctx = container_of(work, struct kbase_context, jit_work); + do { + mutex_lock(&kctx->jit_lock); + if (list_empty(&kctx->jit_destroy_head)) + reg = NULL; + else + reg = list_first_entry(&kctx->jit_destroy_head, + struct kbase_va_region, jit_node); + + if (reg) { + list_del(®->jit_node); + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } else + mutex_unlock(&kctx->jit_lock); + } while (reg); +} + +int kbase_jit_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->jit_active_head); + INIT_LIST_HEAD(&kctx->jit_pool_head); + INIT_LIST_HEAD(&kctx->jit_destroy_head); + mutex_init(&kctx->jit_lock); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + + return 0; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + struct kbase_va_region *reg = NULL; + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + int ret; + + mutex_lock(&kctx->jit_lock); + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + + if (walker->nr_pages >= info->va_pages) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * meet, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, stop looking */ + if (current_diff == 0) + break; + } + } + + if (reg) { + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_active_head); + + /* Release the jit lock before modifying the allocation */ + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + /* Grow the backing if required */ + if (reg->gpu_alloc->nents < info->commit_pages) { + size_t delta; + size_t old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) + != 0) + goto update_failed; + + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->cpu_alloc, delta) != 0) { + kbase_free_phy_pages_helper( + reg->gpu_alloc, delta); + goto update_failed; + } + } + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, + info->commit_pages, old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + } + kbase_gpu_vm_unlock(kctx); + } else { + /* No suitable JIT allocation was found so create a new one */ + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF; + u64 gpu_addr; + u16 alignment; + + mutex_unlock(&kctx->jit_lock); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, + info->extent, &flags, &gpu_addr, &alignment); + if (!reg) + goto out_unlocked; + + mutex_lock(&kctx->jit_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_lock); + } + + return reg; + +update_failed: + /* + * An update to an allocation from the pool failed, chances + * are slim a new allocation would fair any better so return + * the allocation to the pool and return the function with failure. + */ + kbase_gpu_vm_unlock(kctx); + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +out_unlocked: + return NULL; +} + +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + /* The physical backing of memory in the pool is always reclaimable */ + down_read(&kctx->process_mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + kbase_mem_evictable_make(reg->gpu_alloc); + kbase_gpu_vm_unlock(kctx); + up_read(&kctx->process_mm->mmap_sem); + + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +} + +void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = reg->kctx; + + /* + * JIT allocations will always be on a list, if the region + * is not on a list then it's not a JIT allocation. + */ + if (list_empty(®->jit_node)) + return; + + /* + * Freeing the allocation requires locks we might not be able + * to take now, so move the allocation to the free list and kick + * the worker which will do the freeing. + */ + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_destroy_head); + mutex_unlock(&kctx->jit_lock); + + schedule_work(&kctx->jit_work); +} + +bool kbase_jit_evict(struct kbase_context *kctx) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + /* Free the oldest allocation from the pool */ + mutex_lock(&kctx->jit_lock); + if (!list_empty(&kctx->jit_pool_head)) { + reg = list_entry(kctx->jit_pool_head.prev, + struct kbase_va_region, jit_node); + list_del(®->jit_node); + } + mutex_unlock(&kctx->jit_lock); + + if (reg) + kbase_mem_free_region(kctx, reg); + + return (reg != NULL); +} + +void kbase_jit_term(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + + /* Free all allocations for this context */ + + /* + * Flush the freeing of allocations whose backing has been freed + * (i.e. everything in jit_destroy_head). + */ + cancel_work_sync(&kctx->jit_work); + + kbase_gpu_vm_lock(kctx); + /* Free all allocations from the pool */ + while (!list_empty(&kctx->jit_pool_head)) { + walker = list_first_entry(&kctx->jit_pool_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + + /* Free all allocations from active list */ + while (!list_empty(&kctx->jit_active_head)) { + walker = list_first_entry(&kctx->jit_active_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + kbase_gpu_vm_unlock(kctx); +} + +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + phys_addr_t *pa; + long i; + int err = -ENOMEM; + unsigned long address; + struct task_struct *owner; + struct device *dev; + unsigned long offset; + unsigned long local_size; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + owner = alloc->imported.user_buf.owner; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + + pages = alloc->imported.user_buf.pages; + + pinned_pages = get_user_pages(owner, owner->mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = page_to_phys(pages[i]); + + local_size -= min; + offset = 0; + } + + alloc->nents = pinned_pages; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags); + if (err == 0) + return 0; + + alloc->nents = 0; + /* fall down */ +unwind: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kbase_jd_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + phys_addr_t *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, + DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + size_t pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, + count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto out; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (err) { + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ + || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, + struct kds_resource **kds_resources, u32 *kds_res_count, + unsigned long *kds_access_bitmap, bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ) +{ + int err; + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + if (reg->gpu_alloc->imported.user_buf.owner->mm != locked_mm) + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto exit; + } + } + } + break; + case BASE_MEM_IMPORT_TYPE_UMP: { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = ump_dd_kds_resource_get( + reg->gpu_alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = get_dma_buf_kds_resource( + reg->gpu_alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif + reg->gpu_alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + err = kbase_jd_umm_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.umm.current_mapping_usage_count--; + goto exit; + } + } + break; + } +#endif + default: + goto exit; + } + + return kbase_mem_phy_alloc_get(reg->gpu_alloc); +exit: + return NULL; +} + +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + switch (alloc->type) { +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(kctx, alloc); + } + } + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + bool writeable = true; + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + writeable = false; + + kbase_jd_user_buf_unmap(kctx, alloc, writeable); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(alloc); +} + +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *meta = NULL; + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context externel resource metadata list for the + * metadata which matches the region which is being acquired. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + + /* No metadata exists so create one. */ + if (!meta) { + struct kbase_va_region *reg; + + /* Find the region */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + goto failed; + + /* Allocate the metadata object */ + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto failed; + + /* + * Fill in the metadata object and acquire a reference + * for the physical resource. + */ + meta->alloc = kbase_map_external_resource(kctx, reg, NULL +#ifdef CONFIG_KDS + , NULL, NULL, + NULL, false +#endif + ); + + if (!meta->alloc) + goto fail_map; + + meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->refcount = 1; + + list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } else { + if (meta->refcount == UINT_MAX) + goto failed; + + meta->refcount++; + } + + return meta; + +fail_map: + kfree(meta); +failed: + return NULL; +} + +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) { + /* + * Walk the per context externel resource metadata list for the + * metadata which matches the region which is being released. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, + ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + } + + /* No metadata so just return. */ + if (!meta) + return false; + + meta->refcount--; + if ((meta->refcount == 0) || force) { + /* + * Last reference to the metadata, drop the physical memory + * reference and free the metadata. + */ + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); + + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); + } + + return true; +} + +int kbase_sticky_resource_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->ext_res_meta_head); + + return 0; +} + +void kbase_sticky_resource_term(struct kbase_context *kctx) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Free any sticky resources which haven't been unmapped. + * + * Note: + * We don't care about refcounts at this point as no future + * references to the meta data will be made. + * Region termination would find these if we didn't free them + * here, but it's more efficient if we do the clean up here. + */ + while (!list_empty(&kctx->ext_res_meta_head)) { + walker = list_first_entry(&kctx->ext_res_meta_head, + struct kbase_ctx_ext_res_meta, ext_res_node); + + kbase_sticky_resource_release(kctx, walker, 0, true); + } +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 7372e1088bd4..8f7629a3ce5c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,9 @@ #endif #include - +#ifdef CONFIG_KDS +#include +#endif /* CONFIG_KDS */ #ifdef CONFIG_UMP #include #endif /* CONFIG_UMP */ @@ -41,6 +43,8 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include "mali_kbase_gator.h" #endif +/* Required for kbase_mem_evictable_unmake */ +#include "mali_kbase_mem_linux.h" /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ @@ -112,6 +116,16 @@ struct kbase_mem_phy_alloc { /* kbase_cpu_mappings */ struct list_head mappings; + /* Node used to store this allocation on the eviction list */ + struct list_head evict_node; + /* Physical backing size when the pages where evicted */ + size_t evicted; + /* + * Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + */ + struct kbase_va_region *reg; + /* type of buffer */ enum kbase_memory_type type; @@ -242,6 +256,8 @@ struct kbase_va_region { #define KBASE_REG_SECURE (1ul << 19) +#define KBASE_REG_DONT_NEED (1ul << 20) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -276,6 +292,8 @@ struct kbase_va_region { /* non-NULL if this memory object is a kds_resource */ struct kds_resource *kds_res; + /* List head used to store the region in the JIT allocation pool */ + struct list_head jit_node; }; /* Common functions */ @@ -378,14 +396,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, else if (!reg->cpu_alloc) return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->cpu_alloc->evict_node); if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->gpu_alloc->evict_node); } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } + INIT_LIST_HEAD(®->jit_node); reg->flags &= ~KBASE_REG_FREE; return 0; } @@ -505,11 +526,13 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, * @pages: Pointer to array holding the physical addresses of the pages to * free. * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. * * Like kbase_mem_pool_free() but optimized for freeing many pages. */ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty); + phys_addr_t *pages, bool dirty, bool reclaimed); /** * kbase_mem_pool_size - Get number of free pages in memory pool @@ -561,6 +584,7 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); int kbase_region_tracker_init(struct kbase_context *kctx); +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); void kbase_region_tracker_term(struct kbase_context *kctx); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); @@ -854,4 +878,134 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); +/** + * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. + * @kctx: kbase context + */ +void kbase_jit_debugfs_add(struct kbase_context *kctx); + +/** + * kbase_jit_init - Initialize the JIT memory pool management + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_jit_init(struct kbase_context *kctx); + +/** + * kbase_jit_allocate - Allocate JIT memory + * @kctx: kbase context + * @info: JIT allocation information + * + * Return: JIT allocation on success or NULL on failure. + */ +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); + +/** + * kbase_jit_free - Free a JIT allocation + * @kctx: kbase context + * @reg: JIT allocation + * + * Frees a JIT allocation and places it into the free pool for later reuse. + */ +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing + * @reg: JIT allocation + */ +void kbase_jit_backing_lost(struct kbase_va_region *reg); + +/** + * kbase_jit_evict - Evict a JIT allocation from the pool + * @kctx: kbase context + * + * Evict the least recently used JIT allocation from the pool. This can be + * required if normal VA allocations are failing due to VA exhaustion. + * + * Return: True if a JIT allocation was freed, false otherwise. + */ +bool kbase_jit_evict(struct kbase_context *kctx); + +/** + * kbase_jit_term - Terminate the JIT memory pool management + * @kctx: kbase context + */ +void kbase_jit_term(struct kbase_context *kctx); + +/** + * kbase_map_external_resource - Map an external resource to the GPU. + * @kctx: kbase context. + * @reg: The region to map. + * @locked_mm: The mm_struct which has been locked for this operation. + * @kds_res_count: The number of KDS resources. + * @kds_resources: Array of KDS resources. + * @kds_access_bitmap: Access bitmap for KDS. + * @exclusive: If the KDS resource requires exclusive access. + * + * Return: The physical allocation which backs the region on success or NULL + * on failure. + */ +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ); + +/** + * kbase_unmap_external_resource - Unmap an external resource from the GPU. + * @kctx: kbase context. + * @reg: The region to unmap or NULL if it has already been released. + * @alloc: The physical allocation being unmapped. + */ +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_sticky_resource_init - Initialize sticky resource management. + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_sticky_resource_init(struct kbase_context *kctx); + +/** + * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. + * @kctx: kbase context. + * @gpu_addr: The GPU address of the external resource. + * + * Return: The metadata object which represents the binding between the + * external resource and the kbase context on success or NULL on failure. + */ +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_sticky_resource_release - Release a reference on a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * @force: If the release is being forced. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * If force is true then the refcount in the metadata is ignored and the + * resource will be forced freed. + * + * Return: True if the release found the metadata and the reference was dropped. + */ +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force); + +/** + * kbase_sticky_resource_term - Terminate sticky resource management. + * @kctx: kbase context + */ +void kbase_sticky_resource_term(struct kbase_context *kctx); #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index b359f4d94148..0abe0e6ab39c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,15 +36,55 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ +#include #include #include #include #include +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include +#endif + static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); static const struct vm_operations_struct kbase_vm_ops; +/** + * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, -errno on error. + * + * Shrink (or completely remove) all CPU mappings which reference the shrunk + * part of the allocation. + * + * Note: Caller must be holding the processes mmap_sem lock. + */ +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region or NULL if there isn't one + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, negative -errno on error + * + * Unmap the shrunk pages from the GPU mapping. Note that the size of the region + * itself is unmodified as we still need to reserve the VA, only the page tables + * will be modified by this function. + */ +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) { int zone; @@ -77,9 +117,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #if defined(CONFIG_64BIT) if (kctx->is_compat) cpu_va_bits = 32; - else - /* force SAME_VA if a 64-bit client */ - *flags |= BASE_MEM_SAME_VA; #endif if (!kbase_check_alloc_flags(*flags)) { @@ -195,8 +232,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages if (*flags & BASE_MEM_PROT_CPU_WR) prot |= PROT_WRITE; - cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED, - cookie); + cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, + MAP_SHARED, cookie); + if (IS_ERR_VALUE(cpu_addr)) { kctx->pending_regions[cookie_nr] = NULL; kctx->cookies |= (1UL << cookie_nr); @@ -343,12 +381,292 @@ out_unlock: return ret; } +/** + * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the + * Ephemeral memory eviction list. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages which can be freed. + */ +static +unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + + mutex_lock(&kctx->evict_lock); + + list_for_each_entry(alloc, &kctx->evict_list, evict_node) + pages += alloc->nents; + + mutex_unlock(&kctx->evict_lock); + return pages; +} + +/** + * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction + * list for pages and try to reclaim them. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages freed (can be less then requested) or -1 if the + * shrinker failed to free pages in its pool. + * + * Note: + * This function accesses region structures without taking the region lock, + * this is required as the OOM killer can call the shrinker after the region + * lock has already been held. + * This is safe as we can guarantee that a region on the eviction list will + * not be freed (kbase_mem_free_region removes the allocation from the list + * before destroying it), or modified by other parts of the driver. + * The eviction list itself is guarded by the eviction lock and the MMU updates + * are protected by their own lock. + */ +static +unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *tmp; + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { + int err; + + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, + 0, alloc->nents); + if (err != 0) { + /* + * Failed to remove GPU mapping, tell the shrinker + * to stop trying to shrink our slab even though we + * have pages in it. + */ + freed = -1; + goto out_unlock; + } + + /* + * Update alloc->evicted before freeing the backing so the + * helper can determine that it needs to bypass the accounting + * and memory pool. + */ + alloc->evicted = alloc->nents; + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; + list_del_init(&alloc->evict_node); + + /* + * Inform the JIT allocator this region has lost backing + * as it might need to free the allocation. + */ + kbase_jit_backing_lost(alloc->reg); + + /* Enough pages have been freed so stop now */ + if (freed > sc->nr_to_scan) + break; + } +out_unlock: + mutex_unlock(&kctx->evict_lock); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_evictable_reclaim_count_objects(s, sc); + + return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_evictable_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->evict_lock); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +#else + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + kctx->reclaim.batch = 0; +#endif + register_shrinker(&kctx->reclaim); + return 0; +} + +void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ + unregister_shrinker(&kctx->reclaim); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + int __maybe_unused new_page_count; + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + + kbase_process_page_usage_dec(kctx, alloc->nents); + new_page_count = kbase_atomic_sub_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +#endif +} + +/** + * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. + * @alloc: The physical allocation + */ +static +void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + int __maybe_unused new_page_count; + int i; + + new_page_count = kbase_atomic_add_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters so that the allocation is accounted for + * against the process and thus is visible to the OOM killer, + * then remove it from the reclaimable accounting. */ + kbase_process_page_usage_inc(kctx, alloc->nents); + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + + zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +#endif +} + +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + /* This alloction can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + + /* + * Try to shrink the CPU mappings as required, if we fail then + * fail the process of making this allocation evictable. + */ + err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, + 0, gpu_alloc->nents); + if (err) + return -EINVAL; + + /* + * Add the allocation to the eviction list, after this point the shrink + * can reclaim it. + */ + mutex_lock(&kctx->evict_lock); + list_add(&gpu_alloc->evict_node, &kctx->evict_list); + mutex_unlock(&kctx->evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + + gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ + mutex_lock(&kctx->evict_lock); + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->evict_lock); + + if (gpu_alloc->evicted == 0) { + /* + * The backing is still present, update the VM stats as it's + * in use again. + */ + kbase_mem_evictable_unmark_reclaim(gpu_alloc); + } else { + /* If the region is still alive ... */ + if (gpu_alloc->reg) { + /* ... allocate replacement backing ... */ + err = kbase_alloc_phy_pages_helper(gpu_alloc, + gpu_alloc->evicted); + + /* + * ... and grow the mapping back to its + * pre-eviction size. + */ + if (!err) + err = kbase_mem_grow_gpu_mapping(kctx, + gpu_alloc->reg, + gpu_alloc->evicted, 0); + + gpu_alloc->evicted = 0; + } + } + + /* If the region is still alive remove the DONT_NEED attribute. */ + if (gpu_alloc->reg) + gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + + return (err == 0); +} + int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) { struct kbase_va_region *reg; int ret = -EINVAL; unsigned int real_flags = 0; unsigned int prev_flags = 0; + bool prev_needed, new_needed; KBASE_DEBUG_ASSERT(kctx); @@ -359,11 +677,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in flags &= mask; /* check for only supported flags */ - if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* convert flags */ @@ -373,6 +691,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in real_flags |= KBASE_REG_SHARE_IN; /* now we can lock down the context, and find the region */ + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -380,6 +699,28 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (!reg || (reg->flags & KBASE_REG_FREE)) goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { + /* Aliased allocations can't be made ephemeral */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + ret = -EINVAL; + goto out_unlock; + } + ret = kbase_mem_evictable_make(reg->gpu_alloc); + if (ret) + goto out_unlock; + } else { + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + /* limit to imported memory */ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) @@ -422,6 +763,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in out_unlock: kbase_gpu_vm_unlock(kctx); + up_write(¤t->mm->mmap_sem); out: return ret; } @@ -817,8 +1159,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate and add src handles */ for (i = 0; i < nents; i++) { - if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) + if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle.basep.handle != + BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) goto bad_handle; /* unsupported magic handle */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ @@ -830,13 +1173,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, struct kbase_va_region *aliasing_reg; struct kbase_mem_phy_alloc *alloc; - aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); + aliasing_reg = kbase_region_tracker_find_region_base_address( + kctx, + (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ if (!aliasing_reg) goto bad_handle; /* Not found */ if (aliasing_reg->flags & KBASE_REG_FREE) goto bad_handle; /* Free region */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ if (!aliasing_reg->gpu_alloc) goto bad_handle; /* No alloc */ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) @@ -1058,6 +1405,7 @@ static int zap_range_nolock(struct mm_struct *mm, int err = -EINVAL; /* in case end < start */ while (start < end) { + unsigned long local_start; unsigned long local_end; vma = find_vma_intersection(mm, start, end); @@ -1068,12 +1416,17 @@ static int zap_range_nolock(struct mm_struct *mm, if (vma->vm_ops != vm_ops) goto try_next; + local_start = vma->vm_start; + + if (start > local_start) + local_start = start; + local_end = vma->vm_end; if (end < local_end) local_end = end; - err = zap_vma_ptes(vma, start, local_end - start); + err = zap_vma_ptes(vma, local_start, local_end - local_start); if (unlikely(err)) break; @@ -1085,19 +1438,110 @@ try_next: return err; } +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + phys_addr_t *phy_pages; + u64 delta = new_pages - old_pages; + int ret = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* Map the new pages into the GPU */ + phy_pages = kbase_get_gpu_phy_pages(reg); + ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags); + + return ret; +} + +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc; + struct kbase_cpu_mapping *mapping; + int err; + + lockdep_assert_held(&kctx->process_mm->mmap_sem); + + list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) { + unsigned long mapping_size; + + mapping_size = (mapping->vm_end - mapping->vm_start) + >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + err = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + + WARN(err, + "Failed to zap VA range (0x%lx - 0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + + /* The zap failed, give up and exit */ + if (err) + goto failed; + } + } + + return 0; + +failed: + return err; +} + +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + u64 delta = old_pages - new_pages; + int ret = 0; + + ret = kbase_mmu_teardown_pages(kctx, + reg->start_pfn + new_pages, delta); + if (ret) + return ret; + +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* + * Wait for GPU to flush write buffer before freeing + * physical pages. + */ + kbase_wait_write_flush(kctx); + } +#endif + + return ret; +} + int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason) { u64 old_pages; u64 delta; int res = -EINVAL; struct kbase_va_region *reg; - phys_addr_t *phy_pages; + bool read_locked = false; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(failure_reason); KBASE_DEBUG_ASSERT(gpu_addr != 0); - down_read(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -1131,6 +1575,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; goto out_unlock; } + /* can't grow regions which are ephemeral */ + if (reg->flags & BASE_MEM_DONT_NEED) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } if (new_pages == reg->gpu_alloc->nents) { /* no change */ @@ -1138,14 +1587,17 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } - phy_pages = kbase_get_gpu_phy_pages(reg); old_pages = kbase_reg_current_backed_size(reg); - if (new_pages > old_pages) { - /* growing */ - int err; - delta = new_pages - old_pages; + + /* + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ + downgrade_write(¤t->mm->mmap_sem); + read_locked = true; + /* Allocate some more pages */ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; @@ -1160,9 +1612,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } - err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags); - if (err) { + + /* No update required for CPU mappings, that's done on fault. */ + + /* Update GPU mapping. */ + res = kbase_mem_grow_gpu_mapping(kctx, reg, + new_pages, old_pages); + + /* On error free the new pages */ + if (res) { kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, @@ -1171,60 +1629,35 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } else { - /* shrinking */ - struct kbase_cpu_mapping *mapping; - int err; + delta = old_pages - new_pages; - /* first, unmap from any mappings affected */ - list_for_each_entry(mapping, ®->cpu_alloc->mappings, mappings_list) { - unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; - - /* is this mapping affected ?*/ - if ((mapping->page_off + mapping_size) > new_pages) { - unsigned long first_bad = 0; - int zap_res; - - if (new_pages > mapping->page_off) - first_bad = new_pages - mapping->page_off; - - zap_res = zap_range_nolock(current->mm, - &kbase_vm_ops, - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end); - WARN(zap_res, - "Failed to zap VA range (0x%lx - 0x%lx);\n", - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end - ); - } + /* Update all CPU mapping(s) */ + res = kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; } - /* Free some pages */ - delta = old_pages - new_pages; - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, - delta); - if (err) { + /* Update the GPU mapping */ + res = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; goto out_unlock; } -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* Wait for GPU to flush write buffer before freeing physical pages */ - kbase_wait_write_flush(kctx); - } -#endif + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } - res = 0; - out_unlock: kbase_gpu_vm_unlock(kctx); - up_read(¤t->mm->mmap_sem); + if (read_locked) + up_read(¤t->mm->mmap_sem); + else + up_write(¤t->mm->mmap_sem); return res; } @@ -1296,6 +1729,10 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (map->page_off + rel_pgoff >= map->alloc->nents) goto locked_bad_fault; + /* Fault on access to DONT_NEED regions */ + if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) + goto locked_bad_fault; + /* insert all valid pages from the fault location */ for (i = rel_pgoff; i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, @@ -1663,8 +2100,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) rcu_read_unlock(); switch (vma->vm_pgoff) { - case PFN_DOWN(BASE_MEM_INVALID_HANDLE): - case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): + case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): + case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): /* Illegal handle for direct map */ err = -EINVAL; goto out_unlock; @@ -1911,6 +2348,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (page_index + page_count > kbase_reg_current_backed_size(reg)) goto out_unlock; + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; + page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) goto out_unlock; @@ -1945,6 +2385,12 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; sync_needed = map->is_cached; +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif + if (sync_needed) { /* Sync first page */ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); @@ -1986,6 +2432,11 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); bool sync_needed = map->is_cached; vunmap(addr); +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif if (sync_needed) { off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; size_t size = map->size; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index 6a139fd70234..6c0fb5648514 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,6 +42,72 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); int kbase_mmap(struct file *file, struct vm_area_struct *vma); +/** + * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction + * mechanism. + * @kctx: The kbase context to initialize. + * + * Return: Zero on success or -errno on failure. + */ +int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** + * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to de-initialize. + */ +void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** + * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the grow + * @old_pages: The number of pages before the grow + * + * Return: 0 on success, -errno on error. + * + * Expand the GPU mapping to encompass the new psychical pages which have + * been added to the allocation. + * + * Note: Caller must be holding the region lock. + */ +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_evictable_make - Make a physical allocation eligible for eviction + * @gpu_alloc: The physical allocation to make evictable + * + * Return: 0 on success, -errno on error. + * + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback + * as mmap_sem might already be taken) but leave the GPU mapping intact as + * and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** + * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for + * eviction. + * @alloc: The physical allocation to remove eviction eligibility from. + * + * Return: True if the allocation had its backing restored and false if + * it hasn't. + * + * Make the physical pages in the region no longer reclaimable and update the + * per-process stats, if the shrinker has already evicted the memory then + * re-allocate it if the region is still alive. + * + * Note: Must be called with the region lock of the containing context. + */ +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); + struct kbase_vmap_struct { u64 gpu_addr; struct kbase_mem_phy_alloc *cpu_alloc; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 153cd4efac49..c0f47be362f3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,6 +43,9 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size) kbase_mem_pool_max_size(pool), \ ##__VA_ARGS__) +#define NOT_DIRTY false +#define NOT_RECLAIMED false + static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) { spin_lock(&pool->pool_lock); @@ -505,7 +508,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, return 0; err_rollback: - kbase_mem_pool_free_pages(pool, i, pages, false); + kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); return err; } @@ -548,7 +551,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, } void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty) + phys_addr_t *pages, bool dirty, bool reclaimed) { struct kbase_mem_pool *next_pool = pool->next_pool; struct page *p; @@ -558,22 +561,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu):\n", nr_pages); - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); - i += nr_to_pool; + i += nr_to_pool; - if (i != nr_pages && next_pool) { - /* Spill to next pool (may overspill) */ - nr_to_pool = kbase_mem_pool_capacity(next_pool); - nr_to_pool = min(nr_pages - i, nr_to_pool); + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); - kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, - true, dirty); - i += nr_to_pool; + kbase_mem_pool_add_array(next_pool, nr_to_pool, + pages + i, true, dirty); + i += nr_to_pool; + } } /* Free any remaining pages to kernel */ @@ -582,6 +587,10 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, continue; p = phys_to_page(pages[i]); + if (reclaimed) + zone_page_state_add(-1, page_zone(p), + NR_SLAB_RECLAIMABLE); + kbase_mem_pool_free_page(pool, p); pages[i] = 0; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index d81ef593e928..ad4e572771fa 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -56,8 +54,12 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - - dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); + /* If page table is not coherent then ensure the gpu can read + * the pages from memory + */ + if (kbdev->system_coherency != COHERENCY_ACE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); } /* @@ -136,6 +138,18 @@ void page_fault_worker(struct work_struct *data) dev_warn(kbdev->dev, "Access flag unexpectedly set"); goto fault_done; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: + + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault"); + goto fault_done; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, @@ -164,6 +178,13 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } + if ((region->flags & KBASE_REG_DONT_NEED)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Don't need memory can't be grown"); + goto fault_done; + } + /* find the size we need to grow it by */ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address * validating the fault_adress to be within a size_t from the start_pfn */ @@ -238,14 +259,9 @@ void page_fault_worker(struct work_struct *data) /* alloc success */ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); - /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); - /* set up the new pages */ err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); if (err) { - /* failed to insert pages, handle as a normal PF */ - mutex_unlock(&faulting_as->transaction_mutex); kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) kbase_free_phy_pages_helper(region->cpu_alloc, @@ -259,9 +275,10 @@ void page_fault_worker(struct work_struct *data) #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); -#endif + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -324,11 +341,9 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) if (!p) goto sub_pages; -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif page = kmap(p); if (NULL == page) @@ -365,7 +380,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -406,8 +421,9 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) phys_addr_t pgd; int l; - pgd = kctx->pgd; + lockdep_assert_held(&kctx->mmu_lock); + pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); /* Handle failure condition */ @@ -428,7 +444,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -453,6 +469,8 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context phys_addr_t pgd; int l; + lockdep_assert_held(&kctx->mmu_lock); + pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { @@ -476,7 +494,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -524,13 +542,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); while (nr) { unsigned int i; @@ -558,7 +577,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -572,7 +592,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -597,7 +618,12 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + mutex_unlock(&kctx->mmu_lock); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } /* @@ -614,13 +640,14 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); while (nr) { unsigned int i; @@ -648,7 +675,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -662,7 +690,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -688,7 +717,13 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + + mutex_unlock(&kctx->mmu_lock); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); @@ -782,17 +817,18 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) struct kbase_device *kbdev; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr) { /* early out if nothing to do */ return 0; } + mutex_lock(&kctx->mmu_lock); + kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; @@ -808,14 +844,16 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -831,8 +869,13 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) kunmap(p); } + mutex_unlock(&kctx->mmu_lock); kbase_mmu_flush(kctx, vpfn, requested_nr); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); @@ -855,12 +898,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph u64 *pgd_page; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -879,14 +923,16 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kctx->kbdev->dev, "kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -904,9 +950,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph kunmap(pfn_to_page(PFN_DOWN(pgd))); } + mutex_unlock(&kctx->mmu_lock); kbase_mmu_flush(kctx, vpfn, requested_nr); - return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } /* This is a debug feature only */ @@ -934,7 +984,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ @@ -979,6 +1029,8 @@ int kbase_mmu_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + mutex_init(&kctx->mmu_lock); + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); @@ -1004,9 +1056,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); - lockdep_assert_held(&kctx->reg_lock); - + mutex_lock(&kctx->mmu_lock); mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); + mutex_unlock(&kctx->mmu_lock); beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); @@ -1014,11 +1066,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); -#endif } KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); @@ -1033,7 +1083,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1088,13 +1138,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(kctx); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr_pages) { /* can't dump in a 0 sized buffer, early out */ return NULL; } + mutex_lock(&kctx->mmu_lock); + size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); @@ -1116,7 +1166,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; - config[2] = 0; + config[2] = as_setup.transcfg; memcpy(buffer, &config, sizeof(config)); mmu_dump_buffer += sizeof(config); size_left -= sizeof(config); @@ -1130,10 +1180,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) &mmu_dump_buffer, &size_left); - if (!size) { - vfree(kaddr); - return NULL; - } + if (!size) + goto fail_free; /* Add on the size for the end marker */ size += sizeof(u64); @@ -1144,15 +1192,20 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { /* The buffer isn't big enough - free the memory and return failure */ - vfree(kaddr); - return NULL; + goto fail_free; } /* Add the end marker */ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } + mutex_unlock(&kctx->mmu_lock); return kaddr; + +fail_free: + vfree(kaddr); + mutex_unlock(&kctx->mmu_lock); + return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); @@ -1317,6 +1370,15 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSLATION_FAULT"; break; case 0xC8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "PERMISSION_FAULT"; break; case 0xD0: @@ -1330,8 +1392,38 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSTAB_BUS_FAULT"; break; case 0xD8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "ACCESS_FLAG"; break; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ break; default: e = "UNKNOWN"; @@ -1345,7 +1437,12 @@ static const char *access_type_name(struct kbase_device *kbdev, u32 fault_status) { switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + return "ATOMIC"; +#else return "UNKNOWN"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ case AS_FAULTSTATUS_ACCESS_TYPE_READ: return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: @@ -1678,8 +1775,15 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex */ kbasep_js_clear_submit_allowed(js_devdata, kctx); +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, as->fault_addr, + as->fault_extra_addr); +#else dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as->number, as->fault_addr); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ /* * We need to switch to UNMAPPED mode - but we do this in a diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c index 079ef81d06d1..683cabb797db 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,17 +77,25 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_LPAE_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | 0; /* The other indices are unused for now */ - setup->transtab = (u64)kctx->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); - - setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; - setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + setup->transtab = ((u64)kctx->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | + AS_TRANSTAB_LPAE_ADRMODE_TABLE | + AS_TRANSTAB_LPAE_READ_INNER; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#else + setup->transcfg = 0; +#endif } static void mmu_update(struct kbase_context *kctx) @@ -109,6 +117,9 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#endif /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as, NULL); diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index 71f005e32521..6ac49df3f2be 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,15 +27,11 @@ #include #define JOB_NOT_STARTED 0 -#define JOB_TYPE_MASK 0xfe -#define JOB_TYPE_NULL (1 << 1) -#define JOB_TYPE_VERTEX (5 << 1) -#define JOB_TYPE_TILER (7 << 1) -#define JOB_TYPE_FUSED (8 << 1) -#define JOB_TYPE_FRAGMENT (9 << 1) - -#define JOB_FLAG_DESC_SIZE (1 << 0) -#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) +#define JOB_TYPE_NULL (1) +#define JOB_TYPE_VERTEX (5) +#define JOB_TYPE_TILER (7) +#define JOB_TYPE_FUSED (8) +#define JOB_TYPE_FRAGMENT (9) #define JOB_HEADER_32_FBD_OFFSET (31*4) #define JOB_HEADER_64_FBD_OFFSET (44*4) @@ -58,17 +54,9 @@ #define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) #define JOB_POLYGON_LIST (0x03) -struct job_head { - u32 status; - u32 not_complete_index; - u64 fault_addr; - u16 flags; - u16 index; - u16 dependencies[2]; - union { - u64 _64; - u32 _32; - } next; +struct fragment_job { + struct job_descriptor_header header; + u32 x[2]; union { u64 _64; @@ -77,28 +65,43 @@ struct job_head { }; static void dump_job_head(struct kbase_context *kctx, char *head_str, - struct job_head *job) + struct job_descriptor_header *job) { #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "%s\n", head_str); - dev_dbg(kctx->kbdev->dev, "addr = %p\n" - "status = %x\n" - "not_complete_index = %x\n" - "fault_addr = %llx\n" - "flags = %x\n" - "index = %x\n" - "dependencies = %x,%x\n", - job, job->status, job->not_complete_index, - job->fault_addr, job->flags, job->index, - job->dependencies[0], - job->dependencies[1]); - - if (job->flags & JOB_FLAG_DESC_SIZE) + dev_dbg(kctx->kbdev->dev, + "addr = %p\n" + "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" + "first_incomplete_task = %x\n" + "fault_pointer = %llx\n" + "job_descriptor_size = %x\n" + "job_type = %x\n" + "job_barrier = %x\n" + "_reserved_01 = %x\n" + "_reserved_02 = %x\n" + "_reserved_03 = %x\n" + "_reserved_04/05 = %x,%x\n" + "job_index = %x\n" + "dependencies = %x,%x\n", + job, job->exception_status, + JOB_SOURCE_ID(job->exception_status), + (job->exception_status >> 8) & 0x3, + job->exception_status & 0xFF, + job->first_incomplete_task, + job->fault_pointer, job->job_descriptor_size, + job->job_type, job->job_barrier, job->_reserved_01, + job->_reserved_02, job->_reserved_03, + job->_reserved_04, job->_reserved_05, + job->job_index, + job->job_dependency_index_1, + job->job_dependency_index_2); + + if (job->job_descriptor_size) dev_dbg(kctx->kbdev->dev, "next = %llx\n", - job->next._64); + job->next_job._64); else dev_dbg(kctx->kbdev->dev, "next = %x\n", - job->next._32); + job->next_job._32); #endif } @@ -372,77 +375,81 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx, u32 default_weight, u16 hw_job_id_offset, bool first_in_chain, bool fragment_chain) { - struct job_head *job; + struct fragment_job *frag_job; + struct job_descriptor_header *job; u64 new_job_header; struct kbase_vmap_struct map; - job = kbase_vmap(kctx, *job_header, sizeof(*job), &map); - if (!job) { + frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); + if (!frag_job) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_jc: failed to map jc\n"); return -EINVAL; } + job = &frag_job->header; dump_job_head(kctx, "Job header:", job); - if (job->status == JOB_NOT_STARTED && !fragment_chain) { + if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { dev_err(kctx->kbdev->dev, "Job already not started\n"); goto out_unmap; } - job->status = JOB_NOT_STARTED; + job->exception_status = JOB_NOT_STARTED; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) - job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; + if (job->job_type == JOB_TYPE_VERTEX) + job->job_type = JOB_TYPE_NULL; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { + if (job->job_type == JOB_TYPE_FUSED) { dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); goto out_unmap; } if (first_in_chain) - job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; + job->job_barrier = 1; - if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + if ((job->job_dependency_index_1 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_dependency_index_2 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { dev_err(kctx->kbdev->dev, "Job indicies/dependencies out of valid range\n"); goto out_unmap; } - if (job->dependencies[0]) - job->dependencies[0] += hw_job_id_offset; - if (job->dependencies[1]) - job->dependencies[1] += hw_job_id_offset; + if (job->job_dependency_index_1) + job->job_dependency_index_1 += hw_job_id_offset; + if (job->job_dependency_index_2) + job->job_dependency_index_2 += hw_job_id_offset; - job->index += hw_job_id_offset; + job->job_index += hw_job_id_offset; - if (job->flags & JOB_FLAG_DESC_SIZE) { - new_job_header = job->next._64; - if (!job->next._64) - job->next._64 = prev_jc; + if (job->job_descriptor_size) { + new_job_header = job->next_job._64; + if (!job->next_job._64) + job->next_job._64 = prev_jc; } else { - new_job_header = job->next._32; - if (!job->next._32) - job->next._32 = prev_jc; + new_job_header = job->next_job._32; + if (!job->next_job._32) + job->next_job._32 = prev_jc; } dump_job_head(kctx, "Updated to:", job); - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { - bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0; + if (job->job_type == JOB_TYPE_TILER) { + bool job_64 = job->job_descriptor_size != 0; if (kbasep_replay_reset_tiler_job(kctx, *job_header, tiler_heap_free, hierarchy_mask, default_weight, job_64) != 0) goto out_unmap; - } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { + } else if (job->job_type == JOB_TYPE_FRAGMENT) { u64 fbd_address; - if (job->flags & JOB_FLAG_DESC_SIZE) - fbd_address = job->fragment_fbd._64; + if (job->job_descriptor_size) + fbd_address = frag_job->fragment_fbd._64; else - fbd_address = (u64)job->fragment_fbd._32; + fbd_address = (u64)frag_job->fragment_fbd._32; if (fbd_address & FBD_TYPE) { if (kbasep_replay_reset_mfbd(kctx, @@ -485,7 +492,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, u64 jc, u16 *hw_job_id) { while (jc) { - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct map; dev_dbg(kctx->kbdev->dev, @@ -498,13 +505,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, return -EINVAL; } - if (job->index > *hw_job_id) - *hw_job_id = job->index; + if (job->job_index > *hw_job_id) + *hw_job_id = job->job_index; - if (job->flags & JOB_FLAG_DESC_SIZE) - jc = job->next._64; + if (job->job_descriptor_size) + jc = job->next_job._64; else - jc = job->next._32; + jc = job->next_job._32; kbase_vunmap(kctx, &map); } @@ -957,7 +964,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) base_jd_replay_payload *payload; u64 job_header; u64 job_loop_detect; - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct job_map; struct kbase_vmap_struct map; bool err = false; @@ -1012,41 +1019,22 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) } -#ifdef CONFIG_MALI_DEBUG - dev_dbg(dev, "\njob_head structure:\n" - "Source ID:0x%x Access:0x%x Exception:0x%x\n" - "at job addr = %p\n" - "not_complete_index = 0x%x\n" - "fault_addr = 0x%llx\n" - "flags = 0x%x\n" - "index = 0x%x\n" - "dependencies = 0x%x,0x%x\n", - JOB_SOURCE_ID(job->status), - ((job->status >> 8) & 0x3), - (job->status & 0xFF), - job, - job->not_complete_index, - job->fault_addr, - job->flags, - job->index, - job->dependencies[0], - job->dependencies[1]); -#endif + dump_job_head(kctx, "\njob_head structure:\n", job); /* Replay only when the polygon list reader caused the * DATA_INVALID_FAULT */ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && - (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) { + (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { err = true; kbase_vunmap(kctx, &job_map); break; } /* Move on to next fragment job in the list */ - if (job->flags & JOB_FLAG_DESC_SIZE) - job_header = job->next._64; + if (job->job_descriptor_size) + job_header = job->next_job._64; else - job_header = job->next._32; + job_header = job->next_job._32; kbase_vunmap(kctx, &job_map); diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 108c49d9150a..57904ff223d7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,13 @@ #include #include "mali_kbase_sync.h" #endif +#include #include +#include #include +#include +#include +#include /* Mask to check cache alignment of data structures */ #define KBASE_CACHE_ALIGNMENT_MASK ((1<kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static struct page *kbasep_translate_gpu_addr_to_kernel_page( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 pfn; + struct kbase_va_region *reg; + phys_addr_t addr = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + pfn = gpu_addr >> PAGE_SHIFT; + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto err_vm_unlock; + addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; + kbase_gpu_vm_unlock(kctx); + + if (!addr) + goto err; + + return pfn_to_page(PFN_DOWN(addr)); + +err_vm_unlock: + kbase_gpu_vm_unlock(kctx); +err: + return NULL; +} + +int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status) +{ + struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( + kctx, evt); + unsigned char *mapped_pg; + u32 offset = evt & ~PAGE_MASK; + + KBASE_DEBUG_ASSERT(NULL != status); + + if (!pg) + return -1; + + mapped_pg = (unsigned char *)kmap_atomic(pg); + KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ + *status = *(mapped_pg + offset); + kunmap_atomic(mapped_pg); + + return 0; +} + +int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ + struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( + kctx, evt); + unsigned char *mapped_pg; + u32 offset = evt & ~PAGE_MASK; + + KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) || + (new_status == BASE_JD_SOFT_EVENT_RESET)); + + if (!pg) + return -1; + + mapped_pg = (unsigned char *)kmap_atomic(pg); + KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ + *(mapped_pg + offset) = new_status; + kunmap_atomic(mapped_pg); + + return 0; +} + static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { struct kbase_va_region *reg; @@ -68,6 +156,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); mutex_unlock(&js_devdata->runpool_mutex); + /* Also adding this to the list of waiting soft job */ + kbasep_add_waiting_soft_job(katom); + return pm_active_err; } @@ -234,18 +325,19 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom) if (ret == 1) { /* Already signalled */ return 0; - } else if (ret < 0) { - goto cancel_atom; } - return 1; - cancel_atom: - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependant jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, kbase_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + kbasep_add_waiting_soft_job(katom); + return 1; } @@ -283,6 +375,600 @@ finish_softjob: } #endif /* CONFIG_SYNC */ +static void kbasep_soft_event_complete_job(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) +{ + int cancel_timer = 1; + struct list_head *entry, *tmp; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, dep_item[0]); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_EVENT_WAIT) { + if (katom->jc == evt) { + list_del(&katom->dep_item[0]); + + katom->event_code = BASE_JD_EVENT_DONE; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + /* There are still other waiting jobs, we cannot + * cancel the timer yet */ + cancel_timer = 0; + } + } + } + + if (cancel_timer) + hrtimer_try_to_cancel(&kctx->soft_event_timeout); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer) +{ + struct kbase_context *kctx = container_of(timer, struct kbase_context, + soft_event_timeout); + u32 timeout_ms = (u32)atomic_read( + &kctx->kbdev->js_data.soft_event_timeout_ms); + ktime_t cur_time = ktime_get(); + enum hrtimer_restart restarting = HRTIMER_NORESTART; + unsigned long lflags; + struct list_head *entry, *tmp; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, dep_item[0]); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_EVENT_WAIT) { + s64 elapsed_time = + ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + if (elapsed_time > (s64)timeout_ms) { + /* Take it out of the list to ensure that it + * will be cancelled in all cases */ + list_del(&katom->dep_item[0]); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + restarting = HRTIMER_RESTART; + } + } + } + + if (restarting) + hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms)); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + return restarting; +} + +static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + ktime_t remaining; + unsigned char status; + + /* The status of this soft-job is stored in jc */ + if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; + } + + if (status == BASE_JD_SOFT_EVENT_SET) + return 0; /* Event already set, nothing to do */ + + /* Record the start time of this atom so we could cancel it at + * the right time */ + katom->start_timestamp = ktime_get(); + + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed */ + kbasep_add_waiting_soft_job(katom); + + /* Schedule cancellation of this atom after a period if it is + * not active */ + remaining = hrtimer_get_remaining(&kctx->soft_event_timeout); + if (remaining.tv64 <= 0) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_event_timeout_ms); + hrtimer_start(&kctx->soft_event_timeout, + HR_TIMER_DELAY_MSEC((u64)timeout_ms), + HRTIMER_MODE_REL); + } + + return 1; +} + +static void kbasep_soft_event_update(struct kbase_jd_atom *katom, + unsigned char new_status) +{ + /* Complete jobs waiting on the same event */ + struct kbase_context *kctx = katom->kctx; + + if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +struct kbase_debug_copy_buffer { + u64 size; + struct page **pages; + int nr_pages; + u64 offset; +}; + +static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + unsigned int nr = katom->nr_extres; + + if (!buffers) + return; + + for (i = 0; i < nr; i++) { + int p; + + if (!buffers[i].pages) + break; + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + + if (pg) + put_page(pg); + } + kfree(buffers[i].pages); + } + kfree(buffers); + + katom->jc = 0; +} + +static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers; + struct base_jd_debug_copy_buffer *user_buffers = NULL; + unsigned int i; + unsigned int nr = katom->nr_extres; + int ret = 0; + void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + + if (!user_structs) + return -EINVAL; + + buffers = kmalloc_array(nr, sizeof(*buffers), GFP_KERNEL); + if (!buffers) { + ret = -ENOMEM; + katom->jc = 0; + goto out_cleanup; + } + katom->jc = (u64)(uintptr_t)buffers; + + user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + + if (!user_buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + + if (copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr)) { + ret = -EINVAL; + goto out_cleanup; + } + + down_read(¤t->mm->mmap_sem); + for (i = 0; i < nr; i++) { + u64 addr = user_buffers[i].address; + u64 page_addr = addr & PAGE_MASK; + u64 end_page_addr = addr + user_buffers[i].size - 1; + u64 last_page_addr = end_page_addr & PAGE_MASK; + int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + int pinned_pages; + + if (!user_buffers[i].address) { + memset(&buffers[i], 0, + sizeof(struct kbase_debug_copy_buffer)); + continue; + } + + buffers[i].nr_pages = nr_pages; + buffers[i].offset = addr & ~PAGE_MASK; + buffers[i].size = user_buffers[i].size; + + buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!buffers[i].pages) { + ret = -ENOMEM; + goto out_unlock; + } + + pinned_pages = get_user_pages(current, current->mm, page_addr, + nr_pages, + 1, /* Write */ + 0, /* No force */ + buffers[i].pages, + NULL); + if (pinned_pages < 0) { + ret = pinned_pages; + goto out_unlock; + } + if (pinned_pages != nr_pages) { + ret = -EINVAL; + goto out_unlock; + } + } + up_read(¤t->mm->mmap_sem); + + kfree(user_buffers); + + return ret; + +out_unlock: + up_read(¤t->mm->mmap_sem); + +out_cleanup: + kfree(buffers); + kfree(user_buffers); + + /* Frees allocated memory for kbase_debug_copy_job struct, including + * members, and sets jc to 0 */ + kbase_debug_copy_finish(katom); + + return ret; +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + + for (i = 0; i < katom->nr_extres; i++) { + u64 offset = buffers[i].offset; + u64 buffer_space = buffers[i].size; + int p; + + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + void *kpage = kmap(pg); + u64 page_space = PAGE_SIZE-offset; + u64 space; + + if (page_space <= buffer_space) + space = page_space; + else + space = buffer_space; + + /* Temporary - GPUCORE-1843 covers the implementation + * of the actual copying. */ + memset(kpage+offset, 0x4B, space); + + if (!PageReserved(pg)) + SetPageDirty(pg); + + kunmap(pg); + offset = 0; + buffer_space -= space; + } + } + + return 0; +} + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user void *data = (__user void *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + struct kbase_context *kctx = katom->kctx; + int ret; + + /* Fail the job if there is no info structure */ + if (!data) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(info, data, sizeof(*info)) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* If the ID is zero or is in use then fail the job */ + if ((info->id == 0) || (kctx->jit_alloc[info->id])) { + ret = -EINVAL; + goto free_info; + } + + /* Set the jit_alloc to a non-zero value so we know the ID is in use */ + kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; + + /* Sanity check that the PA fits within the VA */ + if (info->va_pages < info->commit_pages) { + ret = -EINVAL; + goto free_info; + } + + /* Ensure the GPU address is correctly aligned */ + if ((info->gpu_alloc_addr & 0x7) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* Replace the user pointer with our kernel allocated info structure */ + katom->jc = (u64)(uintptr_t) info; + + /* + * Note: + * The provided info->gpu_alloc_addr isn't validated here as + * userland can cache allocations which means that even + * though the region is valid it doesn't represent the + * same thing it used to. + * + * Complete validation of va_pages, commit_pages and extent + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ + return 0; + +free_info: + kfree(info); +fail: + katom->jc = 0; + return ret; +} + +static void kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct base_jit_alloc_info *info; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info); + if (!reg) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return; + } + + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocation "live" as the JIT free jit will be + * submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + *ptr = reg->start_pfn << PAGE_SHIFT; + kbase_vunmap(kctx, &mapping); + + katom->event_code = BASE_JD_EVENT_DONE; + + /* + * Bind it to the user provided ID. Do this last so we can check for + * the JIT free racing this JIT alloc job. + */ + kctx->jit_alloc[info->id] = reg; +} + +static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ + struct base_jit_alloc_info *info; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(info); +} + +static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 id = (u8) katom->jc; + + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + /* + * If the ID is valid but the allocation request failed still succeed + * this soft job but don't try and free the allocation. + */ + if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) + kbase_jit_free(kctx, kctx->jit_alloc[id]); + + kctx->jit_alloc[id] = NULL; +} + +static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ + __user struct base_external_resource_list *user_ext_res; + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; + int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ + if (!user_ext_res) { + ret = -EINVAL; + goto fail; + } + + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { + ret = -EINVAL; + goto fail; + } + + /* Is the number of external resources in range? */ + if (!count || count > BASE_EXT_RES_COUNT_MAX) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); + ext_res = kzalloc(copy_size, GFP_KERNEL); + if (!ext_res) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* + * Overwrite the count with the first value incase it was changed + * after the fact. + */ + ext_res->count = count; + + /* + * Replace the user pointer with our kernel allocated + * ext_res structure. + */ + katom->jc = (u64)(uintptr_t) ext_res; + + return 0; + +free_info: + kfree(ext_res); +fail: + return ret; +} + +static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +{ + struct base_external_resource_list *ext_res; + int i; + bool failed = false; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + if (!ext_res) + goto failed_jc; + + kbase_gpu_vm_lock(katom->kctx); + + for (i = 0; i < ext_res->count; i++) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + if (map) { + if (!kbase_sticky_resource_acquire(katom->kctx, + gpu_addr)) + goto failed_loop; + } else + if (!kbase_sticky_resource_release(katom->kctx, NULL, + gpu_addr, false)) + failed = true; + } + + /* + * In the case of unmap we continue unmapping other resources in the + * case of failure but will always report failure if _any_ unmap + * request fails. + */ + if (failed) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + else + katom->event_code = BASE_JD_EVENT_DONE; + + kbase_gpu_vm_unlock(katom->kctx); + + return; + +failed_loop: + while (--i > 0) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + + kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr, + false); + } + + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_vm_unlock(katom->kctx); + +failed_jc: + return; +} + +static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ + struct base_external_resource_list *ext_res; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(ext_res); +} + int kbase_process_soft_job(struct kbase_jd_atom *katom) { switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { @@ -301,6 +987,28 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) #endif /* CONFIG_SYNC */ case BASE_JD_REQ_SOFT_REPLAY: return kbase_replay_process(katom); + case BASE_JD_REQ_SOFT_EVENT_WAIT: + return kbasep_soft_event_wait(katom); + case BASE_JD_REQ_SOFT_EVENT_SET: + kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET); + break; + case BASE_JD_REQ_SOFT_EVENT_RESET: + kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET); + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy(katom); + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_process(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_process(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_process(katom, true); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_process(katom, false); + break; } /* Atom is complete */ @@ -315,6 +1023,9 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) kbase_fence_cancel_wait(katom); break; #endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + kbasep_soft_event_cancel_job(katom); + break; default: /* This soft-job doesn't support cancellation! */ KBASE_DEBUG_ASSERT(0); @@ -372,8 +1083,23 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_JIT_ALLOC: + return kbase_jit_allocate_prepare(katom); case BASE_JD_REQ_SOFT_REPLAY: + case BASE_JD_REQ_SOFT_JIT_FREE: break; + case BASE_JD_REQ_SOFT_EVENT_WAIT: + case BASE_JD_REQ_SOFT_EVENT_SET: + case BASE_JD_REQ_SOFT_EVENT_RESET: + if (katom->jc == 0) + return -EINVAL; + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + return kbase_ext_res_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + return kbase_ext_res_prepare(katom); default: /* Unsupported soft-job */ return -EINVAL; @@ -405,6 +1131,19 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ + + case BASE_JD_REQ_SOFT_DEBUG_COPY: + kbase_debug_copy_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_finish(katom); + break; } } @@ -445,12 +1184,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); } else { - /* The job has not completed */ KBASE_DEBUG_ASSERT((katom_iter->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_SOFT_REPLAY); - list_add_tail(&katom_iter->dep_item[0], - &kctx->waiting_soft_jobs); } mutex_unlock(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 0994ddd7c95f..e41efb81b217 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include #include +#include /*****************************************************************************/ @@ -41,9 +42,6 @@ /* The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ -/* The number of nanoseconds to wait before autoflushing the stream. */ -#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */ - /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ @@ -152,7 +150,9 @@ enum tl_msg_id_obj { /* Message ids of trace events that are recorded in the auxiliary stream. */ enum tl_msg_id_aux { KBASE_AUX_PM_STATE, + KBASE_AUX_ISSUE_JOB_SOFTSTOP, KBASE_AUX_JOB_SOFTSTOP, + KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, KBASE_AUX_PAGESALLOC }; @@ -166,7 +166,7 @@ enum tl_msg_id_aux { * @wbi: write buffer index * @rbi: read buffer index * @numbered: if non-zero stream's packets are sequentially numbered - * @last_write_time: timestamp indicating last write + * @autoflush_counter: counter tracking stream's autoflush state * * This structure holds information needed to construct proper packets in the * timeline stream. Each message in sequence must bear timestamp that is greater @@ -177,6 +177,11 @@ enum tl_msg_id_aux { * Each packet in timeline body stream has sequence number embedded (this value * must increment monotonically and is used by packets receiver to discover * buffer overflows. + * Autoflush counter is set to negative number when there is no data pending + * for flush and it is set to zero on every update of the buffer. Autoflush + * timer will increment the counter by one on every expiry. In case there will + * be no activity on the buffer during two consecutive timer expiries, stream + * buffer will be flushed. */ struct tl_stream { spinlock_t lock; @@ -190,7 +195,7 @@ struct tl_stream { atomic_t rbi; int numbered; - u64 last_write_time; + atomic_t autoflush_counter; }; /** @@ -241,9 +246,6 @@ static atomic_t autoflush_timer_active; * streams at any given time. */ static DEFINE_MUTEX(tl_reader_lock); -/* Indicator of whether the timeline stream file descriptor is already used. */ -static atomic_t tlstream_busy = {0}; - /* Timeline stream event queue. */ static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); @@ -269,8 +271,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_NEW_CTX, __stringify(KBASE_TL_NEW_CTX), "object ctx is created", - "@pI", - "ctx,ctx_nr" + "@pII", + "ctx,ctx_nr,tgid" }, { KBASE_TL_NEW_GPU, @@ -437,6 +439,13 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, + { + KBASE_AUX_ISSUE_JOB_SOFTSTOP, + __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), + "Issuing job soft stop", + "@p", + "atom" + }, { KBASE_AUX_JOB_SOFTSTOP, __stringify(KBASE_AUX_JOB_SOFTSTOP), @@ -444,6 +453,13 @@ static const struct tp_desc tp_desc_aux[] = { "@I", "tag_id" }, + { + KBASE_AUX_JOB_SOFTSTOP_EX, + __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), + "Job soft stop, more details", + "@pI", + "atom,job_type" + }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), @@ -470,6 +486,11 @@ static atomic_t tlstream_bytes_generated = {0}; /*****************************************************************************/ +/* Indicator of whether the timeline stream file descriptor is used. */ +atomic_t kbase_tlstream_enabled = {0}; + +/*****************************************************************************/ + /** * kbasep_tlstream_get_timestamp - return timestamp * @@ -773,6 +794,9 @@ static size_t kbasep_tlstream_msgbuf_submit( unsigned int rb_idx_raw = atomic_read(&stream->rbi); unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + /* Set stream as flushed. */ + atomic_set(&stream->autoflush_counter, -1); + kbasep_tlstream_packet_header_update( stream->buffer[wb_idx].data, wb_size - PACKET_HEADER_SIZE); @@ -828,7 +852,7 @@ static size_t kbasep_tlstream_msgbuf_submit( static char *kbasep_tlstream_msgbuf_acquire( enum tl_stream_type stream_type, size_t msg_size, - unsigned long *flags) + unsigned long *flags) __acquires(&stream->lock) { struct tl_stream *stream; unsigned int wb_idx_raw; @@ -875,14 +899,16 @@ static char *kbasep_tlstream_msgbuf_acquire( */ static void kbasep_tlstream_msgbuf_release( enum tl_stream_type stream_type, - unsigned long flags) + unsigned long flags) __releases(&stream->lock) { struct tl_stream *stream; KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); stream = tl_stream[stream_type]; - stream->last_write_time = kbasep_tlstream_get_timestamp(); + + /* Mark stream as containing unflushed data. */ + atomic_set(&stream->autoflush_counter, 0); spin_unlock_irqrestore(&stream->lock, flags); } @@ -944,6 +970,22 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) size_t wb_size; size_t min_size = PACKET_HEADER_SIZE; + int af_cnt = atomic_read(&stream->autoflush_counter); + + /* Check if stream contain unflushed data. */ + if (0 > af_cnt) + continue; + + /* Check if stream should be flushed now. */ + if (af_cnt != atomic_cmpxchg( + &stream->autoflush_counter, + af_cnt, + af_cnt + 1)) + continue; + if (!af_cnt) + continue; + + /* Autoflush this stream. */ if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -954,16 +996,11 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - u64 timestamp = kbasep_tlstream_get_timestamp(); - - if (timestamp - stream->last_write_time - > AUTOFLUSH_TIMEOUT) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, - wb_size); - } + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, + wb_size); } spin_unlock_irqrestore(&stream->lock, flags); } @@ -1134,7 +1171,12 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp) KBASE_DEBUG_ASSERT(filp); CSTD_UNUSED(inode); CSTD_UNUSED(filp); - atomic_set(&tlstream_busy, 0); + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&autoflush_timer_active, 0); + del_timer_sync(&autoflush_timer); + + atomic_set(&kbase_tlstream_enabled, 0); return 0; } @@ -1223,7 +1265,6 @@ static void kbasep_tlstream_timeline_header( int kbase_tlstream_init(void) { enum tl_stream_type i; - int rcode; /* Prepare stream structures. */ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { @@ -1241,14 +1282,10 @@ int kbase_tlstream_init(void) } /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); + atomic_set(&autoflush_timer_active, 0); setup_timer(&autoflush_timer, kbasep_tlstream_autoflush_timer_callback, 0); - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); return 0; } @@ -1257,9 +1294,6 @@ void kbase_tlstream_term(void) { enum tl_stream_type i; - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { kbasep_timeline_stream_term(tl_stream[i]); kfree(tl_stream[i]); @@ -1268,14 +1302,16 @@ void kbase_tlstream_term(void) int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) { - if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) { + if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { + int rcode; + *fd = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, kctx, O_RDONLY | O_CLOEXEC); if (0 > *fd) { - atomic_set(&tlstream_busy, 0); + atomic_set(&kbase_tlstream_enabled, 0); return *fd; } @@ -1294,6 +1330,14 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) TL_STREAM_TYPE_AUX_HEADER, tp_desc_aux, ARRAY_SIZE(tp_desc_aux)); + + /* Start autoflush timer. */ + atomic_set(&autoflush_timer_active, 1); + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + } else { *fd = -EBUSY; } @@ -1329,11 +1373,12 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) /*****************************************************************************/ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1349,12 +1394,15 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); + KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) { const u32 msg_id = KBASE_TL_NEW_GPU; const size_t msg_size = @@ -1382,7 +1430,7 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) { const u32 msg_id = KBASE_TL_NEW_LPU; const size_t msg_size = @@ -1410,7 +1458,7 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; const size_t msg_size = @@ -1435,7 +1483,7 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) { const u32 msg_id = KBASE_TL_NEW_AS; const size_t msg_size = @@ -1460,7 +1508,7 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; const size_t msg_size = @@ -1487,11 +1535,12 @@ void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) /*****************************************************************************/ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1507,12 +1556,14 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_new_atom(void *atom, u32 nr) +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; const size_t msg_size = @@ -1537,7 +1588,7 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_ctx(void *context) +void __kbase_tlstream_tl_del_ctx(void *context) { const u32 msg_id = KBASE_TL_DEL_CTX; const size_t msg_size = @@ -1560,7 +1611,7 @@ void kbase_tlstream_tl_del_ctx(void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_atom(void *atom) +void __kbase_tlstream_tl_del_atom(void *atom) { const u32 msg_id = KBASE_TL_DEL_ATOM; const size_t msg_size = @@ -1583,7 +1634,7 @@ void kbase_tlstream_tl_del_atom(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = @@ -1608,7 +1659,7 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_RET_ATOM_CTX; const size_t msg_size = @@ -1633,7 +1684,7 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_lpu( +void __kbase_tlstream_tl_ret_atom_lpu( void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; @@ -1664,7 +1715,7 @@ void kbase_tlstream_tl_ret_atom_lpu( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = @@ -1689,7 +1740,7 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; const size_t msg_size = @@ -1714,7 +1765,7 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) { const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; const size_t msg_size = @@ -1739,7 +1790,7 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; const size_t msg_size = @@ -1764,7 +1815,7 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_RET_AS_CTX; const size_t msg_size = @@ -1789,7 +1840,7 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_NRET_AS_CTX; const size_t msg_size = @@ -1814,7 +1865,7 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_RET_ATOM_AS; const size_t msg_size = @@ -1839,7 +1890,7 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_NRET_ATOM_AS; const size_t msg_size = @@ -1864,7 +1915,7 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_atom_config( +void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; @@ -1895,7 +1946,7 @@ void kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_as_config( +void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; @@ -1926,7 +1977,7 @@ void kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = @@ -1951,7 +2002,7 @@ void kbase_tlstream_jd_gpu_soft_reset(void *gpu) /*****************************************************************************/ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = @@ -1976,7 +2027,28 @@ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_job_softstop(u32 js_id) +void __kbase_tlstream_aux_issue_job_softstop(void *katom) +{ + const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(katom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void __kbase_tlstream_aux_job_softstop(u32 js_id) { const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; const size_t msg_size = @@ -1998,7 +2070,66 @@ void kbase_tlstream_aux_job_softstop(u32 js_id) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +/** + * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point + * @katom: the atom that has been soft-stopped + * @job_type: the job type + */ +static void __kbase_tlstream_aux_job_softstop_ex_record( + void *katom, u32 job_type) +{ + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &job_type, sizeof(job_type)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u64 jd = katom->jc; + + while (jd != 0) { + struct job_descriptor_header *job; + struct kbase_vmap_struct map; + + job = kbase_vmap(kctx, jd, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, + "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", + jd, (void *)katom); + break; + } + if (job->exception_status != BASE_JD_EVENT_STOPPED) { + kbase_vunmap(kctx, &map); + break; + } + + __kbase_tlstream_aux_job_softstop_ex_record( + katom, job->job_type); + + jd = job->job_descriptor_size ? + job->next_job._64 : job->next_job._32; + kbase_vunmap(kctx, &map); + } +} + +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = @@ -2023,7 +2154,7 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) { const u32 msg_id = KBASE_AUX_PAGESALLOC; const size_t msg_size = diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index eccd469a86e8..6c5c59616b78 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,18 +102,65 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); +void __kbase_tlstream_tl_del_ctx(void *context); +void __kbase_tlstream_tl_del_atom(void *atom); +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config); +void __kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg); +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +void __kbase_tlstream_aux_issue_job_softstop(void *katom); +void __kbase_tlstream_aux_job_softstop(u32 js_id); +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); + +extern atomic_t kbase_tlstream_enabled; + +#define __TRACE_IF_ENABLED(trace_name, ...) \ + do { \ + int enabled = atomic_read(&kbase_tlstream_enabled); \ + if (enabled) \ + __kbase_tlstream_##trace_name(__VA_ARGS__); \ + } while (0) + +/*****************************************************************************/ + /** * kbase_tlstream_tl_summary_new_ctx - create context object in timeline * summary * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary @@ -125,7 +172,8 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); * created with two attributes: id and core count. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \ + __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) /** * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary @@ -138,7 +186,8 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); * and function bearing information about this LPU abilities. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \ + __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) /** * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU @@ -149,7 +198,8 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); * along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) /** * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary @@ -161,7 +211,8 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); * address space. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +#define kbase_tlstream_tl_summary_new_as(as, nr) \ + __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) /** * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU @@ -172,18 +223,21 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); * shall be deleted along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) /** * kbase_tlstream_tl_new_ctx - create context object in timeline * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. */ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_new_atom - create atom object in timeline @@ -194,7 +248,8 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr); * created with atom number (its attribute) that links it with actual work * bucket id understood by hardware. */ -void kbase_tlstream_tl_new_atom(void *atom, u32 nr); +#define kbase_tlstream_tl_new_atom(atom, nr) \ + __TRACE_IF_ENABLED(tl_new_atom, atom, nr) /** * kbase_tlstream_tl_del_ctx - destroy context object in timeline @@ -203,7 +258,8 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr); * Function emits a timeline message informing that context object ceased to * exist. */ -void kbase_tlstream_tl_del_ctx(void *context); +#define kbase_tlstream_tl_del_ctx(context) \ + __TRACE_IF_ENABLED(tl_del_ctx, context) /** * kbase_tlstream_tl_del_atom - destroy atom object in timeline @@ -212,7 +268,8 @@ void kbase_tlstream_tl_del_ctx(void *context); * Function emits a timeline message informing that atom object ceased to * exist. */ -void kbase_tlstream_tl_del_atom(void *atom); +#define kbase_tlstream_tl_del_atom(atom) \ + __TRACE_IF_ENABLED(tl_del_atom, atom) /** * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU @@ -222,7 +279,8 @@ void kbase_tlstream_tl_del_atom(void *atom); * Function emits a timeline message informing that context is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); +#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_ret_atom_ctx - retain atom by context @@ -232,7 +290,8 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); * Function emits a timeline message informing that atom object is being held * by context and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) /** * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU @@ -243,8 +302,8 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list); +#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \ + __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) /** * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU @@ -254,7 +313,8 @@ void kbase_tlstream_tl_ret_atom_lpu( * Function emits a timeline message informing that context is being released * by LPU object. */ -void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); +#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_nret_atom_ctx - release atom by context @@ -264,7 +324,8 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); * Function emits a timeline message informing that atom object is being * released by context. */ -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) /** * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU @@ -274,7 +335,8 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); * Function emits a timeline message informing that atom object is being * released by LPU. */ -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \ + __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) /** * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context @@ -284,7 +346,8 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); * Function emits a timeline message informing that address space object * is being held by the context object. */ -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) /** * kbase_tlstream_tl_nret_as_ctx - release address space by context @@ -294,7 +357,8 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that address space object * is being released by atom. */ -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) /** * kbase_tlstream_tl_ret_atom_as - retain atom by address space @@ -304,7 +368,8 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that atom object is being held * by address space and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_ret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) /** * kbase_tlstream_tl_nret_atom_as - release atom by address space @@ -314,7 +379,8 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); * Function emits a timeline message informing that atom object is being * released by address space. */ -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_nret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) /** * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom @@ -324,7 +390,8 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); * Function emits a timeline message informing that parent atom waits for * child atom object to be completed before start its execution. */ -void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes @@ -335,8 +402,8 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); * * Function emits a timeline message containing atom attributes. */ -void kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); +#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ + __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) /** * kbase_tlstream_tl_attrib_as_config - address space attributes @@ -347,8 +414,8 @@ void kbase_tlstream_tl_attrib_atom_config( * * Function emits a timeline message containing address space attributes. */ -void kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); +#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ + __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset @@ -357,20 +424,44 @@ void kbase_tlstream_tl_attrib_as_config( * This imperative tracepoint is specific to job dumping. * Function emits a timeline message indicating GPU soft reset. */ -void kbase_tlstream_jd_gpu_soft_reset(void *gpu); +#define kbase_tlstream_jd_gpu_soft_reset(gpu) \ + __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) /** * kbase_tlstream_aux_pm_state - timeline message: power management state * @core_type: core type (shader, tiler, l2 cache, l3 cache) * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) */ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +#define kbase_tlstream_aux_pm_state(core_type, state) \ + __TRACE_IF_ENABLED(aux_pm_state, core_type, state) + +/** + * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued + * @katom: the atom that is being soft-stopped + */ +#define kbase_tlstream_aux_issue_job_softstop(katom) \ + __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) /** * kbase_tlstream_aux_job_softstop - soft job stop occurred * @js_id: job slot id */ -void kbase_tlstream_aux_job_softstop(u32 js_id); +#define kbase_tlstream_aux_job_softstop(js_id) \ + __TRACE_IF_ENABLED(aux_job_softstop, js_id) + +/** + * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom + * @katom: the atom that has been soft-stopped + * + * This trace point adds more details about the soft-stopped atom. These details + * can't be safety collected inside the interrupt handler so we're doing it + * inside a worker. + * + * Note: this is not the same information that is recorded in the trace point, + * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. + */ +#define kbase_tlstream_aux_job_softstop_ex(katom) \ + __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event @@ -378,7 +469,8 @@ void kbase_tlstream_aux_job_softstop(u32 js_id); * @ctx_nr: kernel context number * @page_count_change: number of pages to be added */ -void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); +#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \ + __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) /** * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated @@ -386,7 +478,8 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); * @ctx_nr: kernel context number * @page_count: number of pages used by the context */ -void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); +#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ + __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c index aac9858875ad..a606ae810656 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .open = kbasep_trace_timeline_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index 39514685b752..38f8e1f2f390 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,9 +45,13 @@ * 10.1: * - Do mmap in kernel for SAME_VA memory allocations rather then * calling back into the kernel as a 2nd stage of the allocation request. + * + * 10.2: + * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA + * region for use with JIT (ignored on 32-bit platforms) */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 1 +#define BASE_UK_VERSION_MINOR 2 struct kbase_uk_mem_alloc { union uk_header header; @@ -327,8 +331,8 @@ struct kbase_uk_context_id { int id; }; -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header @@ -396,7 +400,7 @@ struct kbase_uk_tlstream_stats { u32 bytes_generated; }; #endif /* MALI_UNIT_TEST */ -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ /** * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl @@ -411,6 +415,38 @@ struct kbase_uk_prfcnt_values { u32 size; }; +/** + * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure + * @header: UK structure header + * @evt: the GPU address containing the event + * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or + * BASE_JD_SOFT_EVENT_RESET + * @flags: reserved for future uses, must be set to 0 + * + * This structure is used to update the status of a software event. If the + * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting + * on this event will complete. + */ +struct kbase_uk_soft_event_update { + union uk_header header; + /* IN */ + u64 evt; + u32 new_status; + u32 flags; +}; + +/** + * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure + * @header: UK structure header + * @va_pages: Number of virtual pages required for JIT + * + * This structure is used when requesting initialization of JIT. + */ +struct kbase_uk_mem_jit_init { + union uk_header header; + /* IN */ + u64 va_pages; +}; enum kbase_uk_function_id { KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), @@ -463,15 +499,15 @@ enum kbase_uk_function_id { KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), #endif /* MALI_UNIT_TEST */ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), @@ -479,6 +515,10 @@ enum kbase_uk_function_id { KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), #endif + KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), + + KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), + KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index d3d27e2958d7..371122f188b2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include #include #include +#include /*****************************************************************************/ @@ -309,6 +310,8 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer( */ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; int err; vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); @@ -324,10 +327,39 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) return err; } + /* Add kernel context to list of contexts associated with device. */ + element = kzalloc(sizeof(*element), GFP_KERNEL); + if (element) { + element->kctx = vinstr_ctx->kctx; + mutex_lock(&kbdev->kctx_list_lock); + list_add(&element->link, &kbdev->kctx_list); + + /* Inform timeline client about new context. + * Do this while holding the lock to avoid tracepoint + * being created in both body and summary stream. */ + kbase_tlstream_tl_new_ctx( + vinstr_ctx->kctx, + (u32)(vinstr_ctx->kctx->id), + (u32)(vinstr_ctx->kctx->tgid)); + + mutex_unlock(&kbdev->kctx_list_lock); + } else { + /* Don't treat this as a fail - just warn about it. */ + dev_warn(kbdev->dev, + "couldn't add kctx to kctx_list\n"); + } + err = enable_hwcnt(vinstr_ctx); if (err) { kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return err; } @@ -340,6 +372,13 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return -EFAULT; } @@ -353,11 +392,34 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) */ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; + struct kbasep_kctx_list_element *tmp; + bool found = false; + /* Release hw counters dumping resources. */ vinstr_ctx->thread = NULL; disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + + /* Remove kernel context from the device's contexts list. */ + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { + if (element->kctx == vinstr_ctx->kctx) { + list_del(&element->link); + kfree(element); + found = true; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + + if (!found) + dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + + /* Inform timeline client about context destruction. */ + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; } diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h index fc3cf32ba4d2..93fc5eaf273b 100644 --- a/drivers/gpu/arm/midgard/mali_linux_trace.h +++ b/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,19 +15,15 @@ - - #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MALI_H -#include -#include - #undef TRACE_SYSTEM #define TRACE_SYSTEM mali -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE mali_linux_trace +#include + #define MALI_JOB_SLOTS_EVENT_CHANGED /** diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index 778104b57fa9..6b1d67d63720 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ #define GPU_COMMAND 0x030 /* (WO) */ #define GPU_STATUS 0x034 /* (RO) */ - +#define LATEST_FLUSH 0x038 /* (RO) */ #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ @@ -169,6 +169,8 @@ #define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ #define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ #define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ @@ -225,6 +227,7 @@ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ #define MEMORY_MANAGEMENT_BASE 0x2000 #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) @@ -266,6 +269,14 @@ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C /* End Register Offsets */ @@ -293,6 +304,11 @@ #define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) /* * Begin MMU STATUS register values @@ -305,12 +321,38 @@ #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) #define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) +/* + * Begin MMU TRANSCFG register values + */ + +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2 << 28) +#define AS_TRANSCFG_PTW_SH_IS (3 << 28) /* * Begin Command Values @@ -348,6 +390,8 @@ #define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION #define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) #define JS_CONFIG_THREAD_PRI(n) ((n) << 16) /* JS_STATUS register values */ @@ -400,19 +444,35 @@ #define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ #define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ /* End Command Values */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ /* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + #define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ #define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ #define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ /* AS_MEMATTR values: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull @@ -457,6 +517,8 @@ /* End JS_FEATURES register */ /* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index 6384586371d0..eb957d3b5731 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MAX (5000) +#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() /** * Minimum frequency GPU will be clocked at. Given in kHz. * This must be specified as there is no default value. @@ -32,16 +32,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MIN (5000) - -/** - * Values used for determining the GPU frequency based on the LogicTile type - * Used by the function kbase_get_platform_logic_tile_type - */ -#define VE_VIRTEX6_GPU_FREQ_MIN 5000 -#define VE_VIRTEX6_GPU_FREQ_MAX 5000 -#define VE_VIRTEX7_GPU_FREQ_MIN 40000 -#define VE_VIRTEX7_GPU_FREQ_MAX 40000 +#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() /** * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c index 9bc51f1e2da8..4665f98cbbe4 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,18 @@ #define IS_SINGLE_BIT_SET(val, pos) (val&(1<