From: chenzhen Date: Mon, 17 Oct 2016 11:38:36 +0000 (+0800) Subject: MALI: rockchip: upgrade midgard DDK to r14p0-01rel0 X-Git-Tag: firefly_0821_release~597 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=3054c50dd886faa349ca38c9ad8c2162f9706d5a;p=firefly-linux-kernel-4.4.55.git MALI: rockchip: upgrade midgard DDK to r14p0-01rel0 Along with a slight modification in mali_kbase_core_linux.c, for building in rk Linux 4.4: -#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE +#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE Change-Id: I34565cb975866b46c5e3a4d8e2ac5e350dcceb80 Signed-off-by: chenzhen --- diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 8a47cddf7936..4348704e7640 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r13p0-00rel0" +MALI_RELEASE_NAME ?= "r14p0-01rel0" # Paths required for build KBASE_PATH = $(src) @@ -113,7 +113,8 @@ SRC := \ mali_kbase_mem_pool_debugfs.c \ mali_kbase_tlstream.c \ mali_kbase_strings.c \ - mali_kbase_as_fault_debugfs.c + mali_kbase_as_fault_debugfs.c \ + mali_kbase_regs_history_debugfs.c ifeq ($(MALI_UNIT_TEST),1) SRC += mali_kbase_tlstream_test.c diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index c6862539c8dd..fef9a2cb743e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -21,6 +21,8 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { + kbdev->current_gpu_coherency_mode = mode; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index ad05fe5bea8d..2306c757735c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -19,6 +19,7 @@ #include +#include #include #include #ifdef CONFIG_DEVFREQ_THERMAL @@ -108,6 +109,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) kbdev->current_voltage = voltage; kbdev->current_freq = freq; + kbase_tlstream_aux_devfreq_target((u64)freq); + kbase_pm_reset_dvfs_utilisation(kbdev); return err; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c index b9238a305177..dcdf15cdc3e8 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -26,16 +26,145 @@ #include #if !defined(CONFIG_MALI_NO_MALI) + + +#ifdef CONFIG_DEBUG_FS + + +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ + struct kbase_io_access *old_buf; + struct kbase_io_access *new_buf; + unsigned long flags; + + if (!new_size) + goto out_err; /* The new size must not be 0 */ + + new_buf = vmalloc(new_size * sizeof(*h->buf)); + if (!new_buf) + goto out_err; + + spin_lock_irqsave(&h->lock, flags); + + old_buf = h->buf; + + /* Note: we won't bother with copying the old data over. The dumping + * logic wouldn't work properly as it relies on 'count' both as a + * counter and as an index to the buffer which would have changed with + * the new array. This is a corner case that we don't need to support. + */ + h->count = 0; + h->size = new_size; + h->buf = new_buf; + + spin_unlock_irqrestore(&h->lock, flags); + + vfree(old_buf); + + return 0; + +out_err: + return -1; +} + + +int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ + h->enabled = false; + spin_lock_init(&h->lock); + h->count = 0; + h->size = 0; + h->buf = NULL; + if (kbase_io_history_resize(h, n)) + return -1; + + return 0; +} + + +void kbase_io_history_term(struct kbase_io_history *h) +{ + vfree(h->buf); + h->buf = NULL; +} + + +/* kbase_io_history_add - add new entry to the register access history + * + * @h: Pointer to the history data structure + * @addr: Register address + * @value: The value that is either read from or written to the register + * @write: 1 if it's a register write, 0 if it's a read + */ +static void kbase_io_history_add(struct kbase_io_history *h, + void __iomem const *addr, u32 value, u8 write) +{ + struct kbase_io_access *io; + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + + io = &h->buf[h->count % h->size]; + io->addr = (uintptr_t)addr | write; + io->value = value; + ++h->count; + /* If count overflows, move the index by the buffer size so the entire + * buffer will still be dumped later */ + if (unlikely(!h->count)) + h->count = h->size; + + spin_unlock_irqrestore(&h->lock, flags); +} + + +void kbase_io_history_dump(struct kbase_device *kbdev) +{ + struct kbase_io_history *const h = &kbdev->io_history; + u16 i; + size_t iters; + unsigned long flags; + + if (!unlikely(h->enabled)) + return; + + spin_lock_irqsave(&h->lock, flags); + + dev_err(kbdev->dev, "Register IO History:"); + iters = (h->size > h->count) ? h->count : h->size; + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +} + + +#endif /* CONFIG_DEBUG_FS */ + + void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); - dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); writel(value, kbdev->reg + offset); +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + value, 1); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -53,7 +182,13 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, val = readl(kbdev->reg + offset); +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + val, 0); +#endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); + if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); return val; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 3f06a10f7fed..7ad309e8d7f4 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -45,11 +45,11 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) KBASE_INSTR_STATE_REQUEST_CLEAN); /* Enable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* clean&invalidate the caches so we're sure the mmu tables for the dump * buffer is valid */ @@ -96,11 +96,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, } /* Enable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; @@ -185,7 +185,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; out_unrequest_cores: + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } @@ -226,11 +228,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); /* Disable the counters */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); @@ -243,10 +244,11 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbase_pm_unrequest_cores(kbdev, true, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - kbase_pm_release_l2_caches(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); @@ -391,12 +393,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Disable interrupt */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Wakeup... */ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index b891b12a3299..8416b80e8b77 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -148,6 +148,8 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } +KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); + static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c index f2167887229b..202dcfa384a2 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,8 +52,7 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(¤t_as->transaction_mutex); - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; @@ -142,8 +141,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, return; } - lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr]; if (js_per_as_data->as_busy_refcount != 0) { @@ -219,8 +217,7 @@ static bool check_is_runpool_full(struct kbase_device *kbdev, is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >= kbdev->nr_hw_address_spaces); - if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); /* Contexts that submit might use less of the address spaces * available, due to HW workarounds. In which case, the runpool @@ -267,7 +264,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, return i; } - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* No address space currently free, see if we can release one */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { @@ -281,16 +278,14 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, /* Don't release privileged or active contexts, or contexts with * jobs running */ - if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) && + if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && js_per_as_data->as_busy_refcount == 0) { if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, as_kctx)) { WARN(1, "Failed to retain active context\n"); - spin_unlock_irqrestore( - &js_devdata->runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -303,8 +298,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, * context we're about to release without violating lock * ordering */ - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -315,7 +309,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); - if (!as_js_kctx_info->ctx.is_scheduled) { + if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { kbasep_js_runpool_requeue_or_kill_ctx(kbdev, as_kctx, true); @@ -336,11 +330,11 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); } } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -369,12 +363,11 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, new_address_space = &kbdev->as[as_nr]; lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&new_address_space->transaction_mutex); - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) { + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { /* We need to retain it to keep the corresponding address space */ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 83d477898c5e..08a7400e66d5 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -73,8 +73,7 @@ struct slot_rb { * @reset_timer: Timeout for soft-stops before the reset * @timeouts_updated: Have timeout values just been updated? * - * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when - * accessing this structure + * The hwaccess_lock (a spinlock) must be held when accessing this structure */ struct kbase_backend_data { struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 00900a99a898..668258b4f303 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -221,19 +221,40 @@ static void kbasep_job_slot_update_head_start_timestamp( /** * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint * @kbdev: kbase device - * @i: job slot + * @js: job slot * * Get kbase atom by calling kbase_gpu_inspect for given job slot. * Then use obtained katom and name of slot associated with the given * job slot number in tracepoint call to the instrumentation module * informing that given atom is no longer executed on given lpu (job slot). */ -static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); + int i; + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[js]); + } +} - kbase_tlstream_tl_nret_atom_lpu(katom, - &kbdev->gpu_props.props.raw_props.js_features[i]); +/** + * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline + * tracepoint + * @kbdev: kbase device + * @js: job slot + * + * Make a tracepoint call to the instrumentation module informing that + * softstop happened on given lpu (job slot). + */ +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, + int js) +{ + kbase_tlstream_tl_event_lpu_softstop( + &kbdev->gpu_props.props.raw_props.js_features[js]); } void kbase_job_done(struct kbase_device *kbdev, u32 done) @@ -262,7 +283,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) irq_throttle_cycles, NULL); } - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); while (done) { u32 failed = done >> 16; @@ -297,7 +318,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) NULL, 0); #endif - kbase_tlstream_aux_job_softstop(i); + kbasep_trace_tl_event_lpu_softstop( + kbdev, i); kbasep_trace_tl_nret_atom_lpu( kbdev, i); @@ -456,7 +478,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) end_timestamp); } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #if KBASE_GPU_RESET_EN if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { @@ -539,7 +561,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; /* Mark the point where we issue the soft-stop command */ - kbase_tlstream_aux_issue_job_softstop(target_katom); + kbase_tlstream_tl_event_atom_softstop_issue(target_katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { int i; @@ -725,7 +747,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) /* Cancel any remaining running jobs for this kctx */ mutex_lock(&kctx->jctx.lock); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Invalidate all jobs in context, to prevent re-submitting */ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { @@ -737,7 +759,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_hardstop(kctx, i, NULL); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kctx->jctx.lock); } @@ -748,12 +770,13 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; + bool stop_sent = false; KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { struct kbase_jd_atom *katom; @@ -765,8 +788,14 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, if (katom->kctx != kctx) continue; - if (katom->sched_priority > priority) + if (katom->sched_priority > priority) { + if (!stop_sent) + kbase_tlstream_tl_attrib_atom_priority_change( + target_katom); + kbase_job_slot_softstop(kbdev, js, katom); + stop_sent = true; + } } } @@ -835,7 +864,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) * policy queue either */ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - kctx->jctx.sched_info.ctx.is_scheduled == false); + !kbase_ctx_flag(kctx, KCTX_SCHEDULED)); spin_lock_irqsave(&reset_data.lock, flags); if (reset_data.stage == 1) { @@ -945,7 +974,7 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, bool ret = false; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* When we have an atom the decision can be made straight away. */ if (target_katom) @@ -1034,7 +1063,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, #if KBASE_GPU_RESET_EN /* We make the check for AFBC before evicting/stopping atoms. Note * that no other thread can modify the slots whilst we have the - * runpool_irq lock. */ + * hwaccess_lock. */ int needs_workaround_for_afbc = kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, @@ -1130,6 +1159,8 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; + kbase_io_history_dump(kbdev); + dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), @@ -1162,13 +1193,14 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) static void kbasep_reset_timeout_worker(struct work_struct *data) { - unsigned long flags, mmu_flags; + unsigned long flags; struct kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; bool try_schedule = false; bool silent = false; + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; KBASE_DEBUG_ASSERT(data); @@ -1206,17 +1238,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + spin_lock(&kbdev->hwaccess_lock); + spin_lock(&kbdev->mmu_mask_change); /* We're about to flush out the IRQs and their bottom half's */ kbdev->irq_reset_flush = true; /* Disable IRQ to avoid IRQ handlers to kick in after releasing the * spinlock; this also clears any outstanding interrupts */ - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - kbase_pm_disable_interrupts(kbdev); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags); + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock(&kbdev->hwaccess_lock); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Ensure that any IRQ handlers have finished * Must be done without any locks IRQ handlers will take */ @@ -1228,6 +1262,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* The flush has completed so reset the active indicator */ kbdev->irq_reset_flush = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset + * command */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; + + WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + } + mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); @@ -1250,21 +1294,19 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_init_hw(kbdev, 0); /* Complete any jobs that were still on the GPU */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_backend_reset(kbdev, &end_timestamp); kbase_pm_metrics_update(kbdev, NULL); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - mutex_lock(&as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -1272,9 +1314,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&as->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + mutex_unlock(&kbdev->mmu_hw_mutex); kbase_pm_enable_interrupts(kbdev); @@ -1382,9 +1424,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_try_reset_gpu_early_locked(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /** @@ -1429,9 +1471,9 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) struct kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index 8f1e5615ea43..89b1288a1f88 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -40,7 +40,7 @@ * calling this. * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpoool_irq::lock + * - it must hold the hwaccess_lock */ void kbase_job_submit_nolock(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); @@ -74,7 +74,7 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string) * calling this. * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpoool_irq::lock + * - it must hold the hwaccess_lock */ void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -91,7 +91,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * @target_katom: Atom to stop * * The following locking conditions are made on the caller: - * - it must hold the kbasep_js_device_data::runpool_irq::lock + * - it must hold the hwaccess_lock */ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index da7c4df7d277..d7b4d3f10592 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,7 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; rb->write_idx++; @@ -88,7 +89,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, return NULL; } - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; @@ -108,7 +109,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, { struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if ((SLOT_RB_ENTRIES(rb) - 1) < idx) return NULL; /* idx out of range */ @@ -146,7 +147,7 @@ static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) { int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -173,7 +174,7 @@ static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) int js; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { for (i = 0; i < SLOT_RB_SIZE; i++) { @@ -191,7 +192,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -209,7 +210,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { if (kbase_gpu_inspect(kbdev, js, i)) @@ -225,7 +226,7 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, int nr = 0; int i; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (i = 0; i < SLOT_RB_SIZE; i++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); @@ -237,6 +238,56 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, return nr; } +/** + * check_secure_atom - Check if the given atom is in the given secure state and + * has a ringbuffer state of at least + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @katom: Atom pointer + * @secure: Desired secure state + * + * Return: true if atom is in the given state, false otherwise + */ +static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) +{ + if (katom->gpu_rb_state >= + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) + return true; + + return false; +} + +/** + * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given + * secure state in the ringbuffers of at least + * state + * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE + * @kbdev: Device pointer + * @secure: Desired secure state + * + * Return: true if any atoms are in the given state, false otherwise + */ +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + bool secure) +{ + int js, i; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, i); + + if (katom) { + if (check_secure_atom(katom, secure)) + return true; + } + } + } + + return false; +} + int kbase_backend_slot_free(struct kbase_device *kbdev, int js) { if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != @@ -439,7 +490,7 @@ static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, recently_chosen_affinity); /* Note: this is where the caller must've taken the - * runpool_irq.lock */ + * hwaccess_lock */ /* Check for affinity violations - if there are any, * then we just ask the caller to requeue and try again @@ -586,15 +637,12 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, kbase_pm_metrics_update(kbdev, end_timestamp); if (katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter(kbdev); + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, katom->affinity); @@ -603,7 +651,21 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) + kbdev->protected_mode_transition = false; + + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + kbase_vinstr_resume(kbdev->vinstr_ctx); + + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: @@ -666,11 +728,19 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) { int err = -EINVAL; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, "Cannot enter protected mode: protected callbacks not specified.\n"); + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + if (kbdev->system_coherency == COHERENCY_ACE) + kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); + if (kbdev->protected_ops) { /* Switch GPU to protected mode */ err = kbdev->protected_ops->protected_mode_enter(kbdev); @@ -687,7 +757,7 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ONCE(!kbdev->protected_ops, "Cannot exit protected mode: protected callbacks not specified.\n"); @@ -695,53 +765,146 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) if (!kbdev->protected_ops) return -EINVAL; - kbdev->protected_mode_transition = true; kbase_reset_gpu_silent(kbdev); return 0; } -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, struct kbase_jd_atom **katom, int idx, int js) { int err = 0; - switch (katom[idx]->exit_protected_state) { - case KBASE_ATOM_EXIT_PROTECTED_CHECK: + switch (katom[idx]->protected_state.enter) { + case KBASE_ATOM_ENTER_PROTECTED_CHECK: + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + kbdev->protected_mode_transition = true; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_VINSTR; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_VINSTR: + if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { + /* + * We can't switch now because + * the vinstr core state switch + * is not done yet. + */ + return -EAGAIN; + } + + /* Once reaching this point GPU must be + * switched to protected mode or vinstr + * re-enabled. */ + /* - * If the atom ahead of this one hasn't got to being - * submitted yet then bail. + * Not in correct mode, begin protected mode switch. + * Entering protected mode requires us to power down the L2, + * and drop out of fully coherent mode. */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - return -EAGAIN; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: + /* Avoid unnecessary waiting on non-ACE platforms. */ + if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + } - /* If we're not exiting protected mode then we're done here. */ - if (!(kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx]))) - return 0; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + + /* No jobs running, so we can switch GPU mode right now. */ + err = kbase_gpu_protected_mode_enter(kbdev); /* - * If there is a transition in progress, or work still - * on the GPU try again later. + * Regardless of result, we are no longer transitioning + * the GPU. */ - if (kbdev->protected_mode_transition || - kbase_gpu_atoms_submitted_any(kbdev)) - return -EAGAIN; + kbdev->protected_mode_transition = false; + + if (err) { + /* + * Failed to switch into protected mode, resume + * vinstr core and fail atom. + */ + kbase_vinstr_resume(kbdev->vinstr_ctx); + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + return -EINVAL; + } + + /* Protected mode sanity checks. */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + } + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + + switch (katom[idx]->protected_state.exit) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); /* * Exiting protected mode requires a reset, but first the L2 * needs to be powered down to ensure it's not active when the * reset is issued. */ - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + kbdev->protected_mode_transition = true; + kbase_pm_update_cores_state_nolock(kbdev); + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: - if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || + if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { /* * The L2 is still powered, wait for all the users to @@ -749,7 +912,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, */ return -EAGAIN; } - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -757,7 +920,10 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, case KBASE_ATOM_EXIT_PROTECTED_RESET: /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); + if (err) { + kbdev->protected_mode_transition = false; + /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); @@ -775,7 +941,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return -EINVAL; } - katom[idx]->exit_protected_state = + katom[idx]->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -784,6 +950,9 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, if (kbase_reset_gpu_active(kbdev)) return -EAGAIN; + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; + /* protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), @@ -798,11 +967,11 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, return 0; } -void kbase_gpu_slot_update(struct kbase_device *kbdev) +void kbase_backend_slot_update(struct kbase_device *kbdev) { int js; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { struct kbase_jd_atom *katom[2]; @@ -831,11 +1000,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + if (kbase_gpu_check_secure_atoms(kbdev, + !kbase_jd_katom_is_protected( + katom[idx]))) + break; + + if (kbdev->protected_mode_transition) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + /* * Exiting protected mode must be done before * the references on the cores are taken as @@ -843,10 +1027,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) * can't happen after the references for this * atom are taken. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; + + if (!kbase_gpu_in_protected_mode(kbdev) && + kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition into protected mode. */ + ret = kbase_jm_enter_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } else if (kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition out of protected mode. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Atom needs no protected mode transition. */ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; @@ -872,7 +1072,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; } - cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -898,81 +1097,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) if (!kbase_gpu_rmu_workaround(kbdev, js)) break; - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: - - /* Only submit if head atom or previous atom - * already submitted */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - break; - - /* - * If the GPU is transitioning protected mode - * then bail now and we'll be called when the - * new state has settled. - */ - if (kbdev->protected_mode_transition) - break; - - if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { - int err = 0; - - /* Not in correct mode, take action */ - if (kbase_gpu_atoms_submitted_any(kbdev)) { - /* - * We are not in the correct - * GPU mode for this job, and - * we can't switch now because - * there are jobs already - * running. - */ - break; - } - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ - break; - } - /* Once reaching this point GPU must be - * switched to protected mode or vinstr - * re-enabled. */ - - /* No jobs running, so we can switch GPU mode right now */ - err = kbase_gpu_protected_mode_enter(kbdev); - if (err) { - /* - * Failed to switch into protected mode, resume - * vinstr core and fail atom. - */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - break; - } - } - - /* Protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -1037,10 +1161,9 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - + lockdep_assert_held(&kbdev->hwaccess_lock); kbase_gpu_enqueue_atom(kbdev, katom); - kbase_gpu_slot_update(kbdev); + kbase_backend_slot_update(kbdev); } bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) @@ -1048,7 +1171,7 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) struct kbase_jd_atom *katom; struct kbase_jd_atom *next_katom; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom = kbase_gpu_inspect(kbdev, js, 0); next_katom = kbase_gpu_inspect(kbdev, js, 1); @@ -1076,7 +1199,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); struct kbase_context *kctx = katom->kctx; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && completion_code != BASE_JD_EVENT_DONE && @@ -1241,31 +1364,42 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, #endif if (completion_code == BASE_JD_EVENT_STOPPED) - kbase_jm_return_atom_to_js(kbdev, katom); + katom = kbase_jm_return_atom_to_js(kbdev, katom); else - kbase_jm_complete(kbdev, katom, end_timestamp); + katom = kbase_jm_complete(kbdev, katom, end_timestamp); + + if (katom) { + /* Cross-slot dependency has now become runnable. Try to submit + * it. */ + + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } /* Job completion may have unblocked other atoms. Try to update all job * slots */ - kbase_gpu_slot_update(kbdev); + kbase_backend_slot_update(kbdev); } void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) { int js; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int atom_idx = 0; int idx; - for (idx = 0; idx < 2; idx++) { + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, 0); + js, atom_idx); bool keep_in_jm_rb = false; if (!katom) - continue; + break; if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) keep_in_jm_rb = true; @@ -1279,7 +1413,12 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) */ if (keep_in_jm_rb) { katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom->affinity = 0; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the + * next iteration. */ + atom_idx++; continue; } @@ -1292,6 +1431,9 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) kbase_jm_complete(kbdev, katom, end_timestamp); } } + + kbdev->protected_mode_transition = false; + kbdev->protected_mode = false; } static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, @@ -1361,7 +1503,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); @@ -1548,6 +1690,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, /* Limit the number of loops to avoid a hang if the interrupt is missed */ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + unsigned long flags; mutex_lock(&kbdev->cacheclean_lock); @@ -1574,8 +1717,10 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, mutex_unlock(&kbdev->cacheclean_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, false, katom->need_cache_flush_cores_retained); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_backend_complete_wq(struct kbase_device *kbdev, @@ -1620,8 +1765,12 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req, u64 affinity, enum kbase_atom_coreref_state coreref_state) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, coreref_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!kbdev->pm.active_count) { mutex_lock(&kbdev->js_data.runpool_mutex); @@ -1640,7 +1789,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); @@ -1662,7 +1811,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) } } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h index 102d94be93d1..1e0e05ad3ea4 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,17 +66,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, int idx); -/** - * kbase_gpu_slot_update - Update state based on slot ringbuffers - * - * @kbdev: Device pointer - * - * Inspect the jobs in the slot ringbuffers and update state. - * - * This will cause jobs to be submitted to hardware if they are unblocked - */ -void kbase_gpu_slot_update(struct kbase_device *kbdev); - /** * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers * diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index d665420ab380..54d8ddd80097 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -94,9 +94,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, base_jd_core_req core_req = katom->core_req; unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; u64 core_availability_mask; - unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); @@ -105,7 +104,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, * transitioning) then fail. */ if (0 == core_availability_mask) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); *affinity = 0; return false; } @@ -114,7 +112,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); /* If the hardware supports XAFFINITY then we'll only enable * the tiler (which is the default so this is a no-op), * otherwise enable shader core 0. */ @@ -169,8 +166,6 @@ bool kbase_js_choose_affinity(u64 * const affinity, } } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* * If no cores are currently available in the desired core group(s) * (core availability policy is transitioning) then fail. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h index fbffa3b40962..35d9781ae092 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -38,10 +38,9 @@ * violated. * * The following locking conditions are made on the caller - * - it must hold kbasep_js_device_data.runpool_irq.lock + * - it must hold hwaccess_lock */ -bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, - int js); +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); /** * kbase_js_choose_affinity - Compute affinity for a given job. @@ -71,7 +70,7 @@ bool kbase_js_choose_affinity(u64 * const affinity, * @affinity: The affinity mask to test * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock * * Return: true if the affinity would violate the restrictions */ @@ -87,7 +86,7 @@ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, * @affinity: The cores to retain * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock */ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); @@ -106,7 +105,7 @@ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, * %BASE_JM_SUBMIT_SLOTS. * * The following locks must be held by the caller - * - kbasep_js_device_data.runpool_irq.lock + * - hwaccess_lock */ void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index a23deb4ca20c..b09d491c0ec5 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -104,7 +104,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata = &kbdev->js_data; /* Loop through the slots */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { struct kbase_jd_atom *atom = NULL; @@ -168,8 +168,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * However, if it's about to be * increased then the new context can't * run any jobs until they take the - * runpool_irq lock, so it's OK to - * observe the older value. + * hwaccess_lock, so it's OK to observe + * the older value. * * Similarly, if it's about to be * decreased, the last job from another @@ -270,7 +270,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) backend->timeouts_updated = false; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return HRTIMER_NORESTART; } @@ -285,9 +285,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (!timer_callback_should_run(kbdev)) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* From now on, return value of timer_callback_should_run() will * also cause the timer to not requeue itself. Its return value * cannot change, because it depends on variables updated with @@ -298,9 +298,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) if (timer_callback_should_run(kbdev) && !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_start(&backend->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 4a3572d971a6..08eea1c104e0 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -97,6 +97,30 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, return status; } +static void validate_protected_page_fault(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on */ + u32 protected_debug_mode = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) + return; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), + kctx) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) { const int num_as = 16; @@ -141,6 +165,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) */ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + /* find faulting address */ as->fault_addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, @@ -152,6 +177,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); + /* Mark the fault protected or not */ + as->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && as->fault_addr) + { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev, kctx); + } + /* report the fault to debugfs */ kbase_as_fault_debugfs_new(kbdev, as_no); @@ -195,10 +229,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) } /* Process the interrupt for this address space */ - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_interrupt_process(kbdev, kctx, as); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /* reenable interrupts */ @@ -268,6 +301,8 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, { int ret; + lockdep_assert_held(&kbdev->mmu_hw_mutex); + if (op == AS_COMMAND_UNLOCK) { /* Unlock doesn't require a lock first */ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 711e44c7f80a..7690ec56cd81 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -28,10 +28,12 @@ #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ #include -#include +#include #include #include +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); + void kbase_pm_register_access_enable(struct kbase_device *kbdev) { struct kbase_pm_callback_conf *callbacks; @@ -65,6 +67,14 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) mutex_init(&kbdev->pm.lock); + kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.gpu_poweroff_wait_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, + kbase_pm_gpu_poweroff_wait_wq); + kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_DEBUG @@ -119,10 +129,11 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->pm.zero_active_count_wait); kbdev->pm.active_count = 0; - spin_lock_init(&kbdev->pm.power_change_lock); spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); + init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + if (kbase_pm_ca_init(kbdev) != 0) goto workq_fail; @@ -157,47 +168,121 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) * will wait for that state to be reached anyway */ } -bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) { + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_wait_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; - bool cores_are_available; - - lockdep_assert_held(&kbdev->pm.lock); - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - - /* Force all cores off */ - kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; - - /* Force all cores to be unavailable, in the situation where - * transitions are in progress for some cores but not others, - * and kbase_pm_check_transitions_nolock can not immediately - * power off the cores */ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_available_bitmap = 0; + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues */ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + kbase_pm_check_transitions_sync(kbdev); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + if (!backend->poweron_required) { + WARN_ON(kbdev->l2_available_bitmap || + kbdev->shader_available_bitmap || + kbdev->tiler_available_bitmap); + + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + /* Disable interrupts and turn the clock off */ + if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { + /* + * Page/bus faults are pending, must drop locks to + * process. Interrupts are disabled so no more faults + * should be generated at this point. + */ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case.*/ + if (backend->poweron_required) + kbase_pm_clock_on(kbdev, false); + else + WARN_ON(!kbase_pm_clock_off(kbdev, + backend->poweroff_is_suspend)); + } + } - /* NOTE: We won't wait to reach the core's desired state, even if we're - * powering off the GPU itself too. It's safe to cut the power whilst - * they're transitioning to off, because the cores should be idle and - * all cache flushes should already have occurred */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->poweroff_wait_in_progress = false; + if (backend->poweron_required) { + backend->poweron_required = false; + kbase_pm_update_cores_state_nolock(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /* Consume any change-state events */ - kbase_timeline_pm_check_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - /* Disable interrupts and turn the clock off */ - return kbase_pm_clock_off(kbdev, is_suspend); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + wake_up(&kbdev->pm.backend.poweroff_wait); +} + +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!kbdev->pm.backend.poweroff_wait_in_progress) { + /* Force all cores off */ + kbdev->pm.backend.desired_shader_state = 0; + kbdev->pm.backend.desired_tiler_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.poweroff_is_suspend = is_suspend; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /*Kick off wq here. Callers will have to wait*/ + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + +static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) +{ + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); } int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, @@ -269,15 +354,7 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); kbase_pm_cancel_deferred_poweroff(kbdev); - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* Page/bus faults are pending, must drop pm.lock to process. - * Interrupts are disabled so no more faults should be - * generated at this point */ - mutex_unlock(&kbdev->pm.lock); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&kbdev->pm.lock); - WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); - } + kbase_pm_do_poweroff(kbdev, false); mutex_unlock(&kbdev->pm.lock); } @@ -295,6 +372,8 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) /* Shut down the metrics subsystem */ kbasep_pm_metrics_term(kbdev); + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } void kbase_pm_power_changed(struct kbase_device *kbdev) @@ -304,9 +383,8 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); @@ -316,10 +394,9 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - kbase_gpu_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + kbase_backend_slot_update(kbdev); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, @@ -354,21 +431,16 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) * off prematurely) */ mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); - if (!kbase_pm_do_poweroff(kbdev, true)) { - /* Page/bus faults are pending, must drop pm.lock to process. - * Interrupts are disabled so no more faults should be - * generated at this point */ - mutex_unlock(&kbdev->pm.lock); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&kbdev->pm.lock); - WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); - } + kbase_pm_do_poweroff(kbdev, true); kbase_backend_timer_suspend(kbdev); mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); + + kbase_pm_wait_for_poweroff_complete(kbdev); } void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index 4b903cca020b..e8cd8cbd35af 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,10 +95,10 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); old_policy = kbdev->pm.backend.ca_current_policy; kbdev->pm.backend.ca_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (old_policy->term) old_policy->term(kbdev); @@ -106,7 +106,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.ca_current_policy = new_policy; /* If any core power state changes were previously attempted, but @@ -118,7 +118,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, kbdev->shader_ready_bitmap, kbdev->shader_transitioning_bitmap); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->pm.lock); @@ -131,7 +131,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) @@ -151,7 +151,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.ca_current_policy != NULL) kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, @@ -163,20 +163,17 @@ void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.instr_enabled = true; kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = false; kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index e8f96fe6c514..99fb62d006bc 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -207,6 +207,17 @@ union kbase_pm_ca_policy_data { * power_change_lock should be held when accessing, * unless there is no way the timer can be running (eg * hrtimer_cancel() was called immediately before) + * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. + * hwaccess_lock must be held when accessing + * @poweron_required: true if a GPU power on is required. Should only be set + * when poweroff_wait_in_progress is true, and therefore the + * GPU can not immediately be powered on. pm.lock must be + * held when accessing + * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend + * request. pm.lock must be held when accessing + * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off + * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq + * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete * @callback_power_on: Callback when the GPU needs to be turned on. See * &struct kbase_pm_callback_conf * @callback_power_off: Callback when the GPU may be turned off. See @@ -281,6 +292,15 @@ struct kbase_pm_backend_data { bool poweroff_timer_needed; bool poweroff_timer_running; + bool poweroff_wait_in_progress; + bool poweron_required; + bool poweroff_is_suspend; + + struct workqueue_struct *gpu_poweroff_wait_wq; + struct work_struct gpu_poweroff_wait_work; + + wait_queue_head_t poweroff_wait; + int (*callback_power_on)(struct kbase_device *kbdev); void (*callback_power_off)(struct kbase_device *kbdev); void (*callback_power_suspend)(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 03ba23d54365..9271314e00c2 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -174,7 +174,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, u32 lo = cores & 0xFFFFFFFF; u32 hi = (cores >> 32) & 0xFFFFFFFF; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /*-------------------------------------------------------*/ @@ -449,7 +449,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, u64 powering_on_trans; u64 desired_state_in_use; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* Get current state */ present = kbase_pm_get_present_cores(kbdev, type); @@ -493,7 +493,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, kbdev->pm.backend.l2_powered = 0; } - if (desired_state_in_use == ready && (trans == 0)) + if (desired_state == ready && (trans == 0)) return true; /* Restrict the cores to those that are actually present */ @@ -604,7 +604,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) u64 prev_l2_available_bitmap; KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); spin_lock(&kbdev->pm.backend.gpu_powered_lock); if (kbdev->pm.backend.gpu_powered == false) { @@ -776,6 +776,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); /* Wake slow-path waiters. Job scheduler does not use this. */ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } @@ -832,11 +833,13 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) /* Force the transition to be checked and reported - the cores may be * 'available' (for job submission) but not fully powered up. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + /* Don't need 'cores_are_available', because we don't return anything */ CSTD_UNUSED(cores_are_available); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); timeout = jiffies + PM_TIMEOUT; @@ -907,12 +910,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * Clear all interrupts, * and unmask them all. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -924,21 +927,18 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); -void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) { - unsigned long flags; - KBASE_DEBUG_ASSERT(NULL != kbdev); /* * Mask all interrupts, * and clear them all. */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL); @@ -947,8 +947,18 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); } +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -1001,12 +1011,10 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); } + mutex_lock(&kbdev->mmu_hw_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - mutex_lock(&as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (js_devdata->runpool_irq.per_as_data[i].kctx) kbase_mmu_update( @@ -1014,9 +1022,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) else kbase_mmu_disable_as(kbdev, i); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&as->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + mutex_unlock(&kbdev->mmu_hw_mutex); /* Lastly, enable the interrupts */ kbase_pm_enable_interrupts(kbdev); @@ -1253,7 +1261,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { - if ((kbdev->system_coherency == COHERENCY_ACE) && + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_enable_smc != 0) @@ -1266,8 +1274,7 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) { - if ((kbdev->system_coherency == COHERENCY_ACE) && - kbdev->cci_snoop_enabled) { + if (kbdev->cci_snoop_enabled) { #ifdef CONFIG_ARM64 if (kbdev->snoop_disable_smc != 0) { mali_cci_flush_l2(kbdev); @@ -1397,7 +1404,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->pm.backend.reset_done = false; /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->shader_available_bitmap != 0u) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)0u); @@ -1407,7 +1414,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbdev->shader_available_bitmap = 0u; kbdev->tiler_available_bitmap = 0u; kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ if (kbdev->protected_mode_support && @@ -1416,12 +1423,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) else err = kbase_pm_reset_do_normal(kbdev); - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->protected_mode) resume_vinstr = true; - kbdev->protected_mode_transition = false; kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); if (err) goto exit; @@ -1430,7 +1436,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_hw_issues_detect(kbdev); kbase_pm_hw_issues_apply(kbdev); - kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); /* Sanity check protected mode was left after reset */ @@ -1460,7 +1465,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) &kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); kbase_pm_release_l2_caches(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + kbase_pm_disable_interrupts(kbdev); } @@ -1542,12 +1550,14 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); -void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) { unsigned long flags; KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); @@ -1566,4 +1576,15 @@ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) kbase_pm_release_l2_caches(kbdev); } +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index aa51b8cdef8f..ad2667ae7c21 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -167,6 +167,16 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); */ void kbase_pm_disable_interrupts(struct kbase_device *kbdev); +/** + * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() + * that does not take the hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + /** * kbase_pm_init_hw - Initialize the hardware. * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -373,14 +383,35 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no * longer in use * - * If the caller is the - * last caller then the GPU cycle counters will be disabled. A request must have - * been made before a call to this. + * If the caller is the last caller then the GPU cycle counters will be + * disabled. A request must have been made before a call to this. + * + * Caller must not hold the hwaccess_lock, as it will be taken in this function. + * If the caller is already holding this lock then + * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. * * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); +/** + * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() + * that does not take hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to + * complete + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + /** * kbase_pm_register_access_enable - Enable access to GPU registers * @@ -454,12 +485,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); * pointer) * @is_suspend: true if power off due to suspend, * false otherwise - * Return: - * true if power was turned off, else - * false if power can not be turned off due to pending page/bus - * fault workers. Caller must flush MMU workqueues and retry */ -bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); +void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); #ifdef CONFIG_PM_DEVFREQ void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, @@ -496,7 +523,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); * @kbdev: The kbase device structure for the device (must be a valid pointer) * @now: Pointer to the timestamp of the change, or NULL to use current time * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock */ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c index ae632564b96a..7613e1d39fdf 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -362,14 +362,15 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) int device_nr = (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom->device_nr : 0; - WARN_ON(device_nr >= 2); - kbdev->pm.backend.metrics.active_cl_ctx[ - device_nr] = 1; + if (!WARN_ON(device_nr >= 2)) + kbdev->pm.backend.metrics. + active_cl_ctx[device_nr] = 1; } else { /* Slot 2 should not be running non-compute * atoms */ - WARN_ON(js >= 2); - kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + if (!WARN_ON(js >= 2)) + kbdev->pm.backend.metrics. + active_gl_ctx[js] = 1; } kbdev->pm.backend.metrics.gpu_active = true; } @@ -382,7 +383,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) unsigned long flags; ktime_t now; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 4d006028089a..92457e8c0054 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -156,7 +156,7 @@ static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; @@ -193,7 +193,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) kbdev = container_of(timer, struct kbase_device, pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* It is safe for this call to do nothing if the work item is already * queued. The worker function will read the must up-to-date state of @@ -220,7 +220,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } if (kbdev->pm.backend.poweroff_timer_needed) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); @@ -228,7 +228,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) } kbdev->pm.backend.poweroff_timer_running = false; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return HRTIMER_NORESTART; } @@ -258,13 +258,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Only power off the GPU if a request is still pending */ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) do_poweroff = true; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (do_poweroff) { kbdev->pm.backend.poweroff_timer_needed = false; @@ -272,14 +272,7 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) kbdev->pm.backend.poweroff_timer_running = false; /* Power off the GPU */ - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* GPU can not be powered off at present */ - kbdev->pm.backend.poweroff_timer_needed = true; - kbdev->pm.backend.poweroff_timer_running = true; - hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, - HRTIMER_MODE_REL); - } + kbase_pm_do_poweroff(kbdev, false); } mutex_unlock(&kbdev->pm.lock); @@ -325,7 +318,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.poweroff_timer_running = false; /* If wq is already running but is held off by pm.lock, make sure it has @@ -336,7 +329,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_update_active(struct kbase_device *kbdev) @@ -351,7 +344,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* pm_current_policy will never be NULL while pm.lock is held */ KBASE_DEBUG_ASSERT(backend->pm_current_policy); - spin_lock_irqsave(&pm->power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); active = backend->pm_current_policy->get_core_active(kbdev); @@ -363,7 +356,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) /* If a request was pending then the GPU was still * powered, so no need to continue */ if (!kbdev->poweroff_pending) { - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return; } @@ -379,10 +372,14 @@ void kbase_pm_update_active(struct kbase_device *kbdev) HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&pm->power_change_lock, flags); - /* Power on the GPU and any cores requested by the policy */ - kbase_pm_do_poweron(kbdev, false); + if (pm->backend.poweroff_wait_in_progress) { + pm->backend.poweron_required = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } } else { /* It is an error for the power policy to power off the GPU * when there are contexts active */ @@ -414,35 +411,17 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->gpu_poweroff_time, HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { - spin_unlock_irqrestore(&pm->power_change_lock, + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Power off the GPU immediately */ - if (!kbase_pm_do_poweroff(kbdev, false)) { - /* GPU can not be powered off at present - */ - spin_lock_irqsave( - &pm->power_change_lock, - flags); - backend->poweroff_timer_needed = true; - if (!backend->poweroff_timer_running) { - backend->poweroff_timer_running - = true; - hrtimer_start( - &backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore( - &pm->power_change_lock, - flags); - } + kbase_pm_do_poweroff(kbdev, false); } } else { - spin_unlock_irqrestore(&pm->power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } } @@ -454,25 +433,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) bool cores_are_available; bool do_poweroff = false; - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->pm.backend.pm_current_policy == NULL) return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; - desired_bitmap = - kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); - desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - - if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_tiler_bitmap = 1; - else + if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && + !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt + && !kbdev->tiler_inuse_cnt) { + /* We are trying to change in/out of protected mode - force all + * cores off so that the L2 powers down */ + desired_bitmap = 0; desired_tiler_bitmap = 0; + } else { + desired_bitmap = + kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; + desired_tiler_bitmap = 1; + else + desired_tiler_bitmap = 0; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { + /* Unless XAFFINITY is supported, enable core 0 if tiler + * required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || + kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + } } if (kbdev->pm.backend.desired_shader_state != desired_bitmap) @@ -495,7 +486,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks) + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -517,7 +509,8 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.tiler_poweroff_pending |= (kbdev->pm.backend.desired_tiler_state & ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks) + if (kbdev->pm.poweroff_shader_ticks && + !kbdev->protected_mode_transition) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else @@ -563,11 +556,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) @@ -612,10 +605,10 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, mutex_lock(&kbdev->pm.lock); /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); old_policy = kbdev->pm.backend.pm_current_policy; kbdev->pm.backend.pm_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, old_policy->id); @@ -627,9 +620,9 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (new_policy->init) new_policy->init(kbdev); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.pm_current_policy = new_policy; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* If any core power state changes were previously attempted, but * couldn't be made because the policy was changing (current_policy was @@ -664,14 +657,13 @@ kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; u64 cores; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); cores = shader_cores; while (cores) { @@ -714,8 +706,6 @@ void kbase_pm_request_cores(struct kbase_device *kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_cores); @@ -723,13 +713,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores); void kbase_pm_unrequest_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; - kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -770,8 +758,6 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev, * - no-one will wait on the state change */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); @@ -780,11 +766,10 @@ enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; u64 prev_shader_needed; /* Just for tracing */ u64 prev_shader_inuse; /* Just for tracing */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); prev_shader_needed = kbdev->shader_needed_bitmap; prev_shader_inuse = kbdev->shader_inuse_bitmap; @@ -795,16 +780,15 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, * be chosen */ if ((kbdev->pm.backend.desired_shader_state & shader_cores) != shader_cores) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - return KBASE_NEW_AFFINITY; + return (kbdev->pm.backend.poweroff_wait_in_progress || + kbdev->pm.backend.pm_current_policy == NULL) ? + KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; } if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || (tiler_required && !kbdev->tiler_available_bitmap)) { /* Trace ongoing core transition */ kbase_timeline_pm_l2_transition_start(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); return KBASE_CORES_NOT_READY; } @@ -853,8 +837,6 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev, KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - return KBASE_CORES_READY; } @@ -863,12 +845,11 @@ KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { - unsigned long flags; kbase_pm_change_state change_gpu_state = 0u; KBASE_DEBUG_ASSERT(kbdev != NULL); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); while (shader_cores) { int bitnum = fls64(shader_cores) - 1; @@ -913,8 +894,6 @@ void kbase_pm_release_cores(struct kbase_device *kbdev, /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_cores); @@ -923,7 +902,13 @@ void kbase_pm_request_cores_sync(struct kbase_device *kbdev, bool tiler_required, u64 shader_cores) { + unsigned long flags; + + kbase_pm_wait_for_poweroff_complete(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_request_cores(kbdev, tiler_required, shader_cores); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_check_transitions_sync(kbdev); } @@ -935,7 +920,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) unsigned long flags; u32 prior_l2_users_count; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); prior_l2_users_count = kbdev->l2_users_count++; @@ -947,7 +932,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev) if (!prior_l2_users_count || !kbdev->l2_available_bitmap) kbase_pm_check_transitions_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); wait_event(kbdev->pm.backend.l2_powered_wait, kbdev->pm.backend.l2_powered == 1); @@ -959,22 +944,16 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->l2_users_count++; - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); void kbase_pm_release_l2_caches(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); @@ -985,8 +964,6 @@ void kbase_pm_release_l2_caches(struct kbase_device *kbdev) /* Trace that any state change completed immediately */ kbase_pm_trace_check_and_finish_state_change(kbdev); } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index f7c0ff674906..8b07cbc5eae8 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -50,6 +50,7 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_END }; @@ -192,6 +193,31 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tHEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_END +}; #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 149f44cb8674..4d95b4f9f649 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -113,6 +113,8 @@ enum base_hw_issue { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -944,6 +946,8 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_END }; @@ -961,6 +965,8 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_END }; @@ -976,11 +982,32 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_TMIX_8138, BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8456, GPUCORE_1619, BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_END +}; +static const enum base_hw_issue base_hw_issues_model_tHEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + GPUCORE_1619, + BASE_HW_ISSUE_END +}; diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 749dd9a1cc9e..bcb05e40f7fa 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -45,6 +45,9 @@ /* Support UK10_2 IOCTLS */ #define BASE_LEGACY_UK10_2_SUPPORT 1 +/* Support UK10_4 IOCTLS */ +#define BASE_LEGACY_UK10_4_SUPPORT 1 + typedef struct base_mem_handle { struct { u64 handle; @@ -1807,4 +1810,10 @@ typedef struct base_profiling_controls { u32 profiling_controls[FBDUMP_CONTROL_MAX]; } base_profiling_controls; +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) + #endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index b6d28fea9987..443d4b13a4fb 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -45,10 +45,15 @@ #include #include +/* + * Include mali_kbase_defs.h first as this provides types needed by other local + * header files. + */ +#include "mali_kbase_defs.h" + +#include "mali_kbase_context.h" #include "mali_kbase_strings.h" -#include "mali_kbase_pm.h" #include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_defs.h" #include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" #include "mali_kbase_mem.h" @@ -105,7 +110,6 @@ u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control); struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat); void kbase_destroy_context(struct kbase_context *kctx); -int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); int kbase_jd_init(struct kbase_context *kctx); void kbase_jd_exit(struct kbase_context *kctx); @@ -163,7 +167,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev, * than @katom will be soft stopped and put back in the queue, so that atoms * with higher priority can run. * - * The js_data.runpool_irq.lock must be held when calling this function. + * The hwaccess_lock must be held when calling this function. */ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *katom); @@ -546,4 +550,58 @@ void kbasep_trace_dump(struct kbase_device *kbdev); void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive); #endif /* CONFIG_MALI_DEBUG */ + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/* kbase_io_history_init - initialize data struct for register access history + * + * @kbdev The register history to initialize + * @n The number of register accesses that the buffer could hold + * + * @return 0 if successfully initialized, failure otherwise + */ +int kbase_io_history_init(struct kbase_io_history *h, u16 n); + +/* kbase_io_history_term - uninit all resources for the register access history + * + * @h The register history to terminate + */ +void kbase_io_history_term(struct kbase_io_history *h); + +/* kbase_io_history_dump - print the register history to the kernel ring buffer + * + * @kbdev Pointer to kbase_device containing the register history to dump + */ +void kbase_io_history_dump(struct kbase_device *kbdev); + +/** + * kbase_io_history_resize - resize the register access history buffer. + * + * @h: Pointer to a valid register history to resize + * @new_size: Number of accesses the buffer could hold + * + * A successful resize will clear all recent register accesses. + * If resizing fails for any reason (e.g., could not allocate memory, invalid + * buffer size) then the original buffer will be kept intact. + * + * @return 0 if the buffer was resized, failure otherwise + */ +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); + +#else /* CONFIG_DEBUG_FS */ + +#define kbase_io_history_init(...) ((int)0) + +#define kbase_io_history_term CSTD_NOP + +#define kbase_io_history_dump CSTD_NOP + +#define kbase_io_history_resize CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + + #endif + + + diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index 9b00cce9b2b3..e674cc2ea183 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -201,13 +201,13 @@ enum { /* * Default minimum number of scheduling ticks before jobs are hard-stopped */ -#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (100) /* 10s */ #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ /* * Default minimum number of scheduling ticks before CL jobs are hard-stopped. */ -#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (100) /* 10s */ /* * Default minimum number of scheduling ticks before jobs are hard-stopped @@ -225,14 +225,14 @@ enum { * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job */ -#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ +#define DEFAULT_JS_RESET_TICKS_SS (105) /* 10.5s */ #define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" CL job. */ -#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ +#define DEFAULT_JS_RESET_TICKS_CL (105) /* 10.5s */ /* * Default minimum number of scheduling ticks before the GPU is reset to clear a diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 344a1f16de8a..55c5ef613c37 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -53,13 +53,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->kbdev = kbdev; kctx->as_nr = KBASEP_AS_NR_INVALID; - kctx->is_compat = is_compat; + if (is_compat) + kbase_ctx_flag_set(kctx, KCTX_COMPAT); #ifdef CONFIG_MALI_TRACE_TIMELINE kctx->timeline.owner_tgid = task_tgid_nr(current); #endif atomic_set(&kctx->setup_complete, 0); atomic_set(&kctx->setup_in_progress, 0); - kctx->infinite_cache_active = 0; spin_lock_init(&kctx->mm_update_lock); kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); @@ -108,11 +108,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto term_dma_fence; - kctx->pgd = kbase_mmu_alloc_pgd(kctx); - if (!kctx->pgd) - goto free_mmu; + do { + err = kbase_mem_pool_grow(&kctx->mem_pool, + MIDGARD_MMU_BOTTOMLEVEL); + if (err) + goto pgd_no_mem; + kctx->pgd = kbase_mmu_alloc_pgd(kctx); + } while (!kctx->pgd); - kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool); + kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev); if (!kctx->aliasing_sink_page) goto no_sink_page; @@ -162,7 +166,7 @@ no_sink_page: kbase_gpu_vm_lock(kctx); kbase_mmu_free_pgd(kctx); kbase_gpu_vm_unlock(kctx); -free_mmu: +pgd_no_mem: kbase_mmu_term(kctx); term_dma_fence: kbase_dma_fence_term(kctx); @@ -300,17 +304,16 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) } mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); /* Translate the flags */ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) - js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); /* Latch the initial attributes into the Job Scheduler */ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); - spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, - irq_flags); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); out: return err; diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h new file mode 100644 index 000000000000..a3f5bb0ce0da --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_context.h @@ -0,0 +1,90 @@ +/* + * + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_CONTEXT_H_ +#define _KBASE_CONTEXT_H_ + +#include + + +int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); + +/** + * kbase_ctx_flag - Check if @flag is set on @kctx + * @kctx: Pointer to kbase context to check + * @flag: Flag to check + * + * Return: true if @flag is set on @kctx, false if not. + */ +static inline bool kbase_ctx_flag(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + return atomic_read(&kctx->flags) & flag; +} + +/** + * kbase_ctx_flag_clear - Clear @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Clear the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + /* + * Earlier kernel versions doesn't have atomic_andnot() or + * atomic_and(). atomic_clear_mask() was only available on some + * architectures and removed on arm in v3.13 on arm and arm64. + * + * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, + * when atomic_andnot() becomes available. + */ + int old, new; + + do { + old = atomic_read(&kctx->flags); + new = old & ~flag; + + } while (atomic_cmpxchg(&kctx->flags, old, new) != old); +#else + atomic_andnot(flag, &kctx->flags); +#endif +} + +/** + * kbase_ctx_flag_set - Set @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Set the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_set(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + atomic_or(flag, &kctx->flags); +} +#endif /* _KBASE_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index ee59504cd4e8..205d44e7a2a1 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -35,6 +35,7 @@ #if !MALI_CUSTOMER_RELEASE #include "mali_kbase_regs_dump_debugfs.h" #endif /* !MALI_CUSTOMER_RELEASE */ +#include "mali_kbase_regs_history_debugfs.h" #include #include #include @@ -117,39 +118,6 @@ static inline void __compile_time_asserts(void) CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); } -#ifdef CONFIG_KDS - -struct kbasep_kds_resource_set_file_data { - struct kds_resource_set *lock; -}; - -static int kds_resource_release(struct inode *inode, struct file *file); - -static const struct file_operations kds_resource_fops = { - .release = kds_resource_release -}; - -struct kbase_kds_resource_list_data { - struct kds_resource **kds_resources; - unsigned long *kds_access_bitmap; - int num_elems; -}; - -static int kds_resource_release(struct inode *inode, struct file *file) -{ - struct kbasep_kds_resource_set_file_data *data; - - data = (struct kbasep_kds_resource_set_file_data *)file->private_data; - if (NULL != data) { - if (NULL != data->lock) - kds_resource_set_release(&data->lock); - - kfree(data); - } - return 0; -} -#endif /* CONFIG_KDS */ - static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -291,6 +259,7 @@ enum { inited_debugfs = (1u << 15), inited_gpu_device = (1u << 16), inited_registers_map = (1u << 17), + inited_io_history = (1u << 18), inited_power_control = (1u << 19), inited_buslogger = (1u << 20) }; @@ -402,7 +371,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg goto bad_size; #if defined(CONFIG_64BIT) - if (!kctx->is_compat) { + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* force SAME_VA if a 64-bit client */ mem->flags |= BASE_MEM_SAME_VA; } @@ -423,7 +392,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem_import) != args_size) goto bad_size; #ifdef CONFIG_COMPAT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) phandle = compat_ptr(mem_import->phandle.compat_value); else #endif @@ -464,7 +433,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg } #ifdef CONFIG_COMPAT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) user_ai = compat_ptr(alias->ai.compat_value); else #endif @@ -902,14 +871,14 @@ copy_failed: } #ifdef CONFIG_COMPAT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) user_buf = compat_ptr(add_data->buf.compat_value); else #endif user_buf = add_data->buf.value; buf = kmalloc(add_data->len, GFP_KERNEL); - if (!buf) + if (ZERO_OR_NULL_PTR(buf)) goto out_bad; if (0 != copy_from_user(buf, user_buf, add_data->len)) { @@ -940,7 +909,28 @@ copy_failed: break; } #endif /* CONFIG_MALI_NO_MALI */ +#ifdef BASE_LEGACY_UK10_4_SUPPORT + case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4: + { + struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire + = args; + if (sizeof(*tlstream_acquire) != args_size) + goto bad_size; + + if (0 != kbase_tlstream_acquire( + kctx, + &tlstream_acquire->fd, 0)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } else if (0 <= tlstream_acquire->fd) { + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. */ + kbase_create_timeline_objects(kctx); + } + break; + } +#endif /* BASE_LEGACY_UK10_4_SUPPORT */ case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -949,9 +939,13 @@ copy_failed: if (sizeof(*tlstream_acquire) != args_size) goto bad_size; + if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK) + goto out_bad; + if (0 != kbase_tlstream_acquire( kctx, - &tlstream_acquire->fd)) { + &tlstream_acquire->fd, + tlstream_acquire->flags)) { ukh->ret = MALI_ERROR_FUNCTION_FAILED; } else if (0 <= tlstream_acquire->fd) { /* Summary stream was cleared during acquire. @@ -1134,6 +1128,63 @@ void kbase_release_device(struct kbase_device *kbdev) } EXPORT_SYMBOL(kbase_release_device); +#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE +/* + * Older versions, before v4.6, of the kernel doesn't have + * kstrtobool_from_user(). + */ +static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + char buf[32]; + + count = min(sizeof(buf), count); + + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + + return strtobool(buf, res); +} +#endif + +static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + int err; + bool value; + + err = kstrtobool_from_user(ubuf, size, &value); + if (err) + return err; + + if (value) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + else + kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); + + return size; +} + +static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + char buf[32]; + int count; + bool value; + + value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + + count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + + return simple_read_from_buffer(ubuf, size, off, buf, count); +} + +static const struct file_operations kbase_infinite_cache_fops = { + .open = simple_open, + .write = write_ctx_infinite_cache, + .read = read_ctx_infinite_cache, +}; + static int kbase_open(struct inode *inode, struct file *filp) { struct kbase_device *kbdev = NULL; @@ -1158,7 +1209,8 @@ static int kbase_open(struct inode *inode, struct file *filp) filp->private_data = kctx; kctx->filp = filp; - kctx->infinite_cache_active = kbdev->infinite_cache_active_default; + if (kbdev->infinite_cache_active_default) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); #ifdef CONFIG_DEBUG_FS snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); @@ -1176,20 +1228,20 @@ static int kbase_open(struct inode *inode, struct file *filp) * infinite cache control support from debugfs. */ #else - debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, - (bool*)&(kctx->infinite_cache_active)); + debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); #endif /* CONFIG_MALI_COH_USER */ mutex_init(&kctx->mem_profile_lock); - kbasep_jd_debugfs_ctx_add(kctx); + kbasep_jd_debugfs_ctx_init(kctx); kbase_debug_mem_view_init(filp); kbase_debug_job_fault_context_init(kctx); - kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool); - kbase_jit_debugfs_add(kctx); + kbase_jit_debugfs_init(kctx); #endif /* CONFIG_DEBUG_FS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1486,7 +1538,7 @@ static unsigned long kbase_get_unmapped_area(struct file *filp, if (len > TASK_SIZE - SZ_2M) return -ENOMEM; - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); @@ -1836,13 +1888,12 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, new_core_mask[2]) { unsigned long flags; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], new_core_mask[2]); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } return count; @@ -1992,7 +2043,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr struct kbasep_js_device_data *js_data = &kbdev->js_data; unsigned long flags; - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); #define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ @@ -2027,7 +2078,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr kbase_js_set_timeouts(kbdev); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return count; } @@ -2176,7 +2227,7 @@ static ssize_t set_js_scheduling_period(struct device *dev, /* Update scheduling timeouts */ mutex_lock(&js_data->runpool_mutex); - spin_lock_irqsave(&js_data->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an @@ -2206,7 +2257,7 @@ static ssize_t set_js_scheduling_period(struct device *dev, kbase_js_set_timeouts(kbdev); - spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_data->runpool_mutex); dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", @@ -2540,6 +2591,8 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G71" }, + { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-THEx" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3029,7 +3082,8 @@ static int power_control_init(struct platform_device *pdev) #if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) /* Register the OPPs if they are available in device tree */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \ + || defined(LSK_OPPV2_BACKPORT) err = dev_pm_opp_of_add_table(kbdev->dev); #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) err = of_init_opp_table(kbdev->dev); @@ -3121,6 +3175,48 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ +/** + * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read + * @file: File object to read is for + * @buf: User buffer to populate with data + * @len: Length of user buffer + * @ppos: Offset within file object + * + * Retrieves the current status of protected debug mode + * (0 = disabled, 1 = enabled) + * + * Return: Number of bytes added to user buffer + */ +static ssize_t debugfs_protected_debug_mode_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_device *kbdev = (struct kbase_device *)file->private_data; + u32 gpu_status; + ssize_t ret_val; + + kbase_pm_context_active(kbdev); + gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); + kbase_pm_context_idle(kbdev); + + if (gpu_status & GPU_DBGEN) + ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); + else + ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); + + return ret_val; +} + +/* + * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops + * + * Contains the file operations for the "protected_debug_mode" debugfs file + */ +static const struct file_operations fops_protected_debug_mode = { + .open = simple_open, + .read = debugfs_protected_debug_mode_read, + .llseek = default_llseek, +}; + static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3151,8 +3247,9 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) } #if !MALI_CUSTOMER_RELEASE - kbasep_regs_dump_debugfs_add(kbdev); + kbasep_regs_dump_debugfs_init(kbdev); #endif /* !MALI_CUSTOMER_RELEASE */ + kbasep_regs_history_debugfs_init(kbdev); kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); @@ -3179,6 +3276,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_ctx_defaults_directory, &kbdev->mem_pool_max_size_default); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + debugfs_create_file("protected_debug_mode", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &fops_protected_debug_mode); + } + #if KBASE_TRACE_ENABLE kbasep_trace_debugfs_init(kbdev); #endif /* KBASE_TRACE_ENABLE */ @@ -3401,6 +3504,11 @@ static int kbase_platform_device_remove(struct platform_device *pdev) kbdev->inited_subsys &= ~inited_backend_early; } + if (kbdev->inited_subsys & inited_io_history) { + kbase_io_history_term(&kbdev->io_history); + kbdev->inited_subsys &= ~inited_io_history; + } + if (kbdev->inited_subsys & inited_power_control) { power_control_term(kbdev); kbdev->inited_subsys &= ~inited_power_control; @@ -3434,6 +3542,10 @@ static void kbase_platform_device_shutdown(struct platform_device *pdev) kbase_platform_rk_shutdown(kbdev); } +/* Number of register accesses for the buffer that we allocate during + * initialization time. The buffer size can be changed later via debugfs. */ +#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) + static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; @@ -3494,6 +3606,15 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_power_control; + err = kbase_io_history_init(&kbdev->io_history, + KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); + if (err) { + dev_err(&pdev->dev, "Register access history initialization failed\n"); + kbase_platform_device_remove(pdev); + return -ENOMEM; + } + kbdev->inited_subsys |= inited_io_history; + err = kbase_backend_early_init(kbdev); if (err) { dev_err(kbdev->dev, "Early backend initialization failed\n"); @@ -3673,6 +3794,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev) return err; } +#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE + + /** Suspend callback from the OS. * * This is called by Linux when the device should suspend. diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index 4bb8c2c7aec2..edd671106e2c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -227,6 +228,39 @@ struct kbase_jd_atom_dependency { u8 dep_type; }; +/** + * struct kbase_io_access - holds information about 1 register access + * + * @addr: first bit indicates r/w (r=0, w=1) + * @value: value written or read + */ +struct kbase_io_access { + uintptr_t addr; + u32 value; +}; + +/** + * struct kbase_io_history - keeps track of all recent register accesses + * + * @enabled: true if register accesses are recorded, false otherwise + * @lock: spinlock protecting kbase_io_access array + * @count: number of registers read/written + * @size: number of elements in kbase_io_access array + * @buf: array of kbase_io_access + */ +struct kbase_io_history { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool enabled; +#else + u32 enabled; +#endif + + spinlock_t lock; + size_t count; + u16 size; + struct kbase_io_access *buf; +}; + /** * @brief The function retrieves a read-only reference to the atom field from * the kbase_jd_atom_dependency structure @@ -301,15 +335,17 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - /* Atom is in slot ringbuffer but is waiting for proected mode exit */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT, + /* Atom is in slot ringbuffer but is waiting for a previous protected + * mode transition to complete */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, + /* Atom is in slot ringbuffer but is waiting for proected mode + * transition */ + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, /* Atom is in slot ringbuffer but is waiting for cores to become * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - /* Atom is in slot ringbuffer but is waiting for protected mode entry */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY, /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, /* Atom is in slot ringbuffer and has been submitted to the GPU */ @@ -319,20 +355,41 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_RETURN_TO_JS }; +enum kbase_atom_enter_protected_state { + /* + * Starting state: + * Check if a transition into protected mode is required. + * + * NOTE: The integer value of this must + * match KBASE_ATOM_EXIT_PROTECTED_CHECK. + */ + KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, + /* Wait for vinstr to suspend. */ + KBASE_ATOM_ENTER_PROTECTED_VINSTR, + /* Wait for the L2 to become idle in preparation for + * the coherency change. */ + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + /* End state; + * Prepare coherency change. */ + KBASE_ATOM_ENTER_PROTECTED_FINISHED, +}; + enum kbase_atom_exit_protected_state { /* * Starting state: * Check if a transition out of protected mode is required. + * + * NOTE: The integer value of this must + * match KBASE_ATOM_ENTER_PROTECTED_CHECK. */ - KBASE_ATOM_EXIT_PROTECTED_CHECK, - /* Wait for the L2 to become idle in preparation for the reset. */ + KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, + /* Wait for the L2 to become idle in preparation + * for the reset. */ KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, /* Issue the protected reset. */ KBASE_ATOM_EXIT_PROTECTED_RESET, - /* - * End state; - * Wait for the reset to complete. - */ + /* End state; + * Wait for the reset to complete. */ KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; @@ -497,7 +554,13 @@ struct kbase_jd_atom { * event_code when the atom is processed. */ enum base_jd_event_code will_fail_event_code; - enum kbase_atom_exit_protected_state exit_protected_state; + /* Atoms will only ever be transitioning into, or out of + * protected mode so we do not need two separate fields. + */ + union { + enum kbase_atom_enter_protected_state enter; + enum kbase_atom_exit_protected_state exit; + } protected_state; struct rb_node runnable_tree_node; @@ -600,19 +663,19 @@ struct kbase_as { struct work_struct work_pagefault; struct work_struct work_busfault; enum kbase_mmu_fault_type fault_type; + bool protected_mode; u32 fault_status; u64 fault_addr; u64 fault_extra_addr; - struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; /* BASE_HW_ISSUE_8316 */ struct workqueue_struct *poke_wq; struct work_struct poke_work; - /** Protected by kbasep_js_device_data::runpool_irq::lock */ + /** Protected by hwaccess_lock */ int poke_refcount; - /** Protected by kbasep_js_device_data::runpool_irq::lock */ + /** Protected by hwaccess_lock */ kbase_as_poke_state poke_state; struct hrtimer poke_timer; }; @@ -733,8 +796,7 @@ struct kbase_trace_kbdev_timeline { * But it's kept as an example of how to add global timeline tracking * information * - * The caller must hold kbasep_js_device_data::runpool_irq::lock when - * accessing this */ + * The caller must hold hwaccess_lock when accessing this */ u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; /* Last UID for each PM event */ @@ -743,7 +805,7 @@ struct kbase_trace_kbdev_timeline { atomic_t pm_event_uid_counter; /* * L2 transition state - true indicates that the transition is ongoing - * Expected to be protected by pm.power_change_lock */ + * Expected to be protected by hwaccess_lock */ bool l2_transitioning; }; #endif /* CONFIG_MALI_TRACE_TIMELINE */ @@ -784,19 +846,6 @@ struct kbase_pm_device_data { u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; u64 debug_core_mask_all; - /** - * Lock protecting the power state of the device. - * - * This lock must be held when accessing the shader_available_bitmap, - * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and - * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition - * and shader_poweroff_pending fields of kbase_pm_device_data. It is - * also held when the hardware power registers are being written to, to - * ensure that two threads do not conflict over the power transitions - * that the hardware should make. - */ - spinlock_t power_change_lock; - /** * Callback for initializing the runtime power management. * @@ -1108,6 +1157,11 @@ struct kbase_device { /* Total number of created contexts */ atomic_t ctx_num; +#ifdef CONFIG_DEBUG_FS + /* Holds the most recent register accesses */ + struct kbase_io_history io_history; +#endif /* CONFIG_DEBUG_FS */ + struct kbase_hwaccess_data hwaccess; /* Count of page/bus faults waiting for workqueues to process */ @@ -1125,6 +1179,8 @@ struct kbase_device { #endif size_t mem_pool_max_size_default; + /* current gpu coherency mode */ + u32 current_gpu_coherency_mode; /* system coherency mode */ u32 system_coherency; /* Flag to track when cci snoops have been enabled on the interface */ @@ -1171,6 +1227,11 @@ struct kbase_device { /* list of inited sub systems. Used during terminate/error recovery */ u32 inited_subsys; + + spinlock_t hwaccess_lock; + + /* Protects access to MMU operations */ + struct mutex mmu_hw_mutex; }; /** @@ -1181,7 +1242,7 @@ struct kbase_device { * dependencies. Atoms on this list will be moved to the * runnable_tree when the blocking atom completes. * - * runpool_irq.lock must be held when accessing this structure. + * hwaccess_lock must be held when accessing this structure. */ struct jsctx_queue { struct rb_root runnable_tree; @@ -1193,6 +1254,52 @@ struct jsctx_queue { (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) +/** + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, +}; + struct kbase_context { struct file *filp; struct kbase_device *kbdev; @@ -1207,7 +1314,7 @@ struct kbase_context { atomic_t event_count; int event_coalesce_count; - bool is_compat; + atomic_t flags; atomic_t setup_complete; atomic_t setup_in_progress; @@ -1251,12 +1358,11 @@ struct kbase_context { /** This is effectively part of the Run Pool, because it only has a valid * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in * - * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing - * this. + * The hwaccess_lock must be held whilst accessing this. * * If the context relating to this as_nr is required, you must use * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear - * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock + * whilst you're using it. Alternatively, just hold the hwaccess_lock * to ensure the context doesn't disappear (but this has restrictions on what other locks * you can take whilst doing this) */ int as_nr; @@ -1282,8 +1388,7 @@ struct kbase_context { size_t mem_profile_size; /* Mutex guarding memory profile state */ struct mutex mem_profile_lock; - /* Memory profile file created */ - bool mem_profile_initialized; + /* Memory profile directory under debugfs */ struct dentry *kctx_dentry; /* for job fault debug */ @@ -1303,15 +1408,6 @@ struct kbase_context { atomic_t atoms_pulled; /* Number of atoms currently pulled from this context, per slot */ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; - /* true if last kick() caused atoms to be pulled from this context */ - bool pulled; - /* true if infinite cache is to be enabled for new allocations. Existing - * allocations will not change. bool stored as a u32 per Linux API */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool infinite_cache_active; -#else - u32 infinite_cache_active; -#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; @@ -1325,17 +1421,11 @@ struct kbase_context { struct kbase_vinstr_client *vinstr_cli; struct mutex vinstr_cli_lock; - /* Must hold queue_mutex when accessing */ - bool ctx_active; - /* List of completed jobs waiting for events to be posted */ struct list_head completed_jobs; /* Number of work items currently pending on job_done_wq */ atomic_t work_count; - /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ - bool ctx_runnable_ref; - /* Waiting soft-jobs will fail when this timer expires */ struct timer_list soft_job_timeout; diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index 62ab0caf9858..7484eec5108f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -82,7 +82,6 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i) if (!kbdev->as[i].pf_wq) return -EINVAL; - mutex_init(&kbdev->as[i].transaction_mutex); INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); @@ -151,6 +150,7 @@ int kbase_device_init(struct kbase_device * const kbdev) #endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); + mutex_init(&kbdev->mmu_hw_mutex); #ifdef CONFIG_ARM64 kbdev->cci_snoop_enabled = false; np = kbdev->dev->of_node; diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c index 4d3836a1d4ab..97bb6c5db76c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -38,6 +38,8 @@ /* Spin lock protecting all Mali fences as fence->lock. */ static DEFINE_SPINLOCK(kbase_dma_fence_lock); +static void +kbase_dma_fence_work(struct work_struct *pwork); static void kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) @@ -167,9 +169,30 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, ww_acquire_fini(ctx); } +/** + * kbase_dma_fence_queue_work() - Queue work to handle @katom + * @katom: Pointer to atom for which to queue work + * + * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and + * submit the atom. + */ +static void +kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + bool ret; + + INIT_WORK(&katom->work, kbase_dma_fence_work); + ret = queue_work(kctx->dma_fence.wq, &katom->work); + /* Warn if work was already queued, that should not happen. */ + WARN_ON(!ret); +} + /** * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom * @katom: Pointer to katom + * @queue_worker: Boolean indicating if fence worker is to be queued when + * dep_count reaches 0. * * This function will free all fence callbacks on the katom's list of * callbacks. Callbacks that have not yet been called, because their fence @@ -178,7 +201,7 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. */ static void -kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom) +kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker) { struct kbase_dma_fence_cb *cb, *tmp; @@ -191,10 +214,21 @@ kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom) /* Cancel callbacks that hasn't been called yet. */ ret = fence_remove_callback(cb->fence, &cb->fence_cb); if (ret) { + int ret; + /* Fence had not signaled, clean up after * canceling. */ - atomic_dec(&katom->dma_fence.dep_count); + ret = atomic_dec_return(&katom->dma_fence.dep_count); + + if (unlikely(queue_worker && ret == 0)) { + /* + * dep_count went to zero and queue_worker is + * true. Queue the worker to handle the + * completion of the katom. + */ + kbase_dma_fence_queue_work(katom); + } } /* @@ -219,7 +253,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) lockdep_assert_held(&katom->kctx->jctx.lock); /* Cancel callbacks and clean up. */ - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, false); KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0); @@ -264,9 +298,15 @@ kbase_dma_fence_work(struct work_struct *pwork) /* Remove atom from list of dma-fence waiting atoms. */ kbase_dma_fence_waiters_remove(katom); /* Cleanup callbacks. */ - kbase_dma_fence_free_callbacks(katom); - /* Queue atom on GPU. */ - kbase_jd_dep_clear_locked(katom); + kbase_dma_fence_free_callbacks(katom, false); + /* + * Queue atom on GPU, unless it has already completed due to a failing + * dependency. Run jd_done_nolock() on the katom if it is completed. + */ + if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) + jd_done_nolock(katom, NULL); + else + kbase_jd_dep_clear_locked(katom); out: mutex_unlock(&ctx->lock); @@ -332,20 +372,13 @@ kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) struct kbase_dma_fence_cb, fence_cb); struct kbase_jd_atom *katom = kcb->katom; - struct kbase_context *kctx = katom->kctx; /* If the atom is zapped dep_count will be forced to a negative number * preventing this callback from ever scheduling work. Which in turn * would reschedule the atom. */ - if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { - bool ret; - - INIT_WORK(&katom->work, kbase_dma_fence_work); - ret = queue_work(kctx->dma_fence.wq, &katom->work); - /* Warn if work was already queued, that should not happen. */ - WARN_ON(!ret); - } + if (atomic_dec_and_test(&katom->dma_fence.dep_count)) + kbase_dma_fence_queue_work(katom); } static int @@ -406,7 +439,7 @@ out: * On error, cancel and clean up all callbacks that was set up * before the error. */ - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, false); } return err; @@ -499,7 +532,7 @@ end: /* Test if the callbacks are already triggered */ if (atomic_dec_and_test(&katom->dma_fence.dep_count)) { atomic_set(&katom->dma_fence.dep_count, -1); - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, false); } else { /* Add katom to the list of dma-buf fence waiting atoms * only if it is still waiting. @@ -512,7 +545,7 @@ end: * kill it for us), signal the fence, free callbacks and the * fence. */ - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, false); atomic_set(&katom->dma_fence.dep_count, -1); kbase_dma_fence_signal(katom); } @@ -522,10 +555,12 @@ end: void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) { - struct kbase_jd_atom *katom, *katom_tmp; + struct list_head *list = &kctx->dma_fence.waiting_resource; + + while (!list_empty(list)) { + struct kbase_jd_atom *katom; - list_for_each_entry_safe(katom, katom_tmp, - &kctx->dma_fence.waiting_resource, queue) { + katom = list_first_entry(list, struct kbase_jd_atom, queue); kbase_dma_fence_waiters_remove(katom); kbase_dma_fence_cancel_atom(katom); } @@ -534,7 +569,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) { /* Cancel callbacks and clean up. */ - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, true); } void kbase_dma_fence_signal(struct kbase_jd_atom *katom) @@ -549,7 +584,7 @@ void kbase_dma_fence_signal(struct kbase_jd_atom *katom) fence_put(katom->dma_fence.fence); katom->dma_fence.fence = NULL; - kbase_dma_fence_free_callbacks(katom); + kbase_dma_fence_free_callbacks(katom, false); } void kbase_dma_fence_term(struct kbase_context *kctx) diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index bf8c304610eb..f07406c3d5ab 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -180,7 +180,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) kbase_event_process_noreport(ctx, atom); return; } - + kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED); if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { /* Don't report the event until other event(s) have completed */ mutex_lock(&ctx->event_mutex); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 4af3e4815e95..3292fa95fddd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -41,7 +41,6 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) { const char * const *hardware_counters; struct kbase_device *kbdev; - uint32_t gpu_id; uint32_t product_id; uint32_t count; @@ -53,25 +52,27 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) if (!kbdev) return NULL; - gpu_id = kbdev->gpu_props.props.core_props.product_id; - product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; - product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + product_id = kbdev->gpu_props.props.core_props.product_id; if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) { case GPU_ID2_PRODUCT_TMIX: hardware_counters = hardware_counters_mali_tMIx; count = ARRAY_SIZE(hardware_counters_mali_tMIx); break; + case GPU_ID2_PRODUCT_THEX: + hardware_counters = hardware_counters_mali_tHEx; + count = ARRAY_SIZE(hardware_counters_mali_tHEx); + break; default: hardware_counters = NULL; count = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", - gpu_id); + dev_err(kbdev->dev, "Unrecognized product ID: %u\n", + product_id); break; } } else { - switch (gpu_id) { + switch (product_id) { /* If we are using a Mali-T60x device */ case GPU_ID_PI_T60X: hardware_counters = hardware_counters_mali_t60x; @@ -115,8 +116,8 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) default: hardware_counters = NULL; count = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", - gpu_id); + dev_err(kbdev->dev, "Unrecognized product ID: %u\n", + product_id); break; } } diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h index c247dd698e19..7ec05c1c7aa3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -2158,6 +2158,7 @@ static const char * const hardware_counters_mali_t88x[] = { #include "mali_kbase_gator_hwcnt_names_tmix.h" +#include "mali_kbase_gator_hwcnt_names_thex.h" #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h new file mode 100644 index 000000000000..bcceef4fc9bc --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h @@ -0,0 +1,291 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ +#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ + +static const char * const hardware_counters_mali_tHEx[] = { + /* Performance counters for the Job Manager */ + "", + "", + "", + "", + "THEx_MESSAGES_SENT", + "THEx_MESSAGES_RECEIVED", + "THEx_GPU_ACTIVE", + "THEx_IRQ_ACTIVE", + "THEx_JS0_JOBS", + "THEx_JS0_TASKS", + "THEx_JS0_ACTIVE", + "", + "THEx_JS0_WAIT_READ", + "THEx_JS0_WAIT_ISSUE", + "THEx_JS0_WAIT_DEPEND", + "THEx_JS0_WAIT_FINISH", + "THEx_JS1_JOBS", + "THEx_JS1_TASKS", + "THEx_JS1_ACTIVE", + "", + "THEx_JS1_WAIT_READ", + "THEx_JS1_WAIT_ISSUE", + "THEx_JS1_WAIT_DEPEND", + "THEx_JS1_WAIT_FINISH", + "THEx_JS2_JOBS", + "THEx_JS2_TASKS", + "THEx_JS2_ACTIVE", + "", + "THEx_JS2_WAIT_READ", + "THEx_JS2_WAIT_ISSUE", + "THEx_JS2_WAIT_DEPEND", + "THEx_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Performance counters for the Tiler */ + "", + "", + "", + "", + "THEx_TILER_ACTIVE", + "THEx_JOBS_PROCESSED", + "THEx_TRIANGLES", + "THEx_LINES", + "THEx_POINTS", + "THEx_FRONT_FACING", + "THEx_BACK_FACING", + "THEx_PRIM_VISIBLE", + "THEx_PRIM_CULLED", + "THEx_PRIM_CLIPPED", + "THEx_PRIM_SAT_CULLED", + "", + "", + "THEx_BUS_READ", + "", + "THEx_BUS_WRITE", + "THEx_LOADING_DESC", + "THEx_IDVS_POS_SHAD_REQ", + "THEx_IDVS_POS_SHAD_WAIT", + "THEx_IDVS_POS_SHAD_STALL", + "THEx_IDVS_POS_FIFO_FULL", + "THEx_PREFETCH_STALL", + "THEx_VCACHE_HIT", + "THEx_VCACHE_MISS", + "THEx_VCACHE_LINE_WAIT", + "THEx_VFETCH_POS_READ_WAIT", + "THEx_VFETCH_VERTEX_WAIT", + "THEx_VFETCH_STALL", + "THEx_PRIMASSY_STALL", + "THEx_BBOX_GEN_STALL", + "THEx_IDVS_VBU_HIT", + "THEx_IDVS_VBU_MISS", + "THEx_IDVS_VBU_LINE_DEALLOCATE", + "THEx_IDVS_VAR_SHAD_REQ", + "THEx_IDVS_VAR_SHAD_STALL", + "THEx_BINNER_STALL", + "THEx_ITER_STALL", + "THEx_COMPRESS_MISS", + "THEx_COMPRESS_STALL", + "THEx_PCACHE_HIT", + "THEx_PCACHE_MISS", + "THEx_PCACHE_MISS_STALL", + "THEx_PCACHE_EVICT_STALL", + "THEx_PMGR_PTR_WR_STALL", + "THEx_PMGR_PTR_RD_STALL", + "THEx_PMGR_CMD_WR_STALL", + "THEx_WRBUF_ACTIVE", + "THEx_WRBUF_HIT", + "THEx_WRBUF_MISS", + "THEx_WRBUF_NO_FREE_LINE_STALL", + "THEx_WRBUF_NO_AXI_ID_STALL", + "THEx_WRBUF_AXI_STALL", + "", + "", + "", + "THEx_UTLB_TRANS", + "THEx_UTLB_TRANS_HIT", + "THEx_UTLB_TRANS_STALL", + "THEx_UTLB_TRANS_MISS_DELAY", + "THEx_UTLB_MMU_REQ", + + /* Performance counters for the Shader Core */ + "", + "", + "", + "", + "THEx_FRAG_ACTIVE", + "THEx_FRAG_PRIMITIVES", + "THEx_FRAG_PRIM_RAST", + "THEx_FRAG_FPK_ACTIVE", + "THEx_FRAG_STARVING", + "THEx_FRAG_WARPS", + "THEx_FRAG_PARTIAL_WARPS", + "THEx_FRAG_QUADS_RAST", + "THEx_FRAG_QUADS_EZS_TEST", + "THEx_FRAG_QUADS_EZS_UPDATE", + "THEx_FRAG_QUADS_EZS_KILL", + "THEx_FRAG_LZS_TEST", + "THEx_FRAG_LZS_KILL", + "", + "THEx_FRAG_PTILES", + "THEx_FRAG_TRANS_ELIM", + "THEx_QUAD_FPK_KILLER", + "", + "THEx_COMPUTE_ACTIVE", + "THEx_COMPUTE_TASKS", + "THEx_COMPUTE_WARPS", + "THEx_COMPUTE_STARVING", + "THEx_EXEC_CORE_ACTIVE", + "THEx_EXEC_ACTIVE", + "THEx_EXEC_INSTR_COUNT", + "THEx_EXEC_INSTR_DIVERGED", + "THEx_EXEC_INSTR_STARVING", + "THEx_ARITH_INSTR_SINGLE_FMA", + "THEx_ARITH_INSTR_DOUBLE", + "THEx_ARITH_INSTR_MSG", + "THEx_ARITH_INSTR_MSG_ONLY", + "THEx_TEX_INSTR", + "THEx_TEX_INSTR_MIPMAP", + "THEx_TEX_INSTR_COMPRESSED", + "THEx_TEX_INSTR_3D", + "THEx_TEX_INSTR_TRILINEAR", + "THEx_TEX_COORD_ISSUE", + "THEx_TEX_COORD_STALL", + "THEx_TEX_STARVE_CACHE", + "THEx_TEX_STARVE_FILTER", + "THEx_LS_MEM_READ_FULL", + "THEx_LS_MEM_READ_SHORT", + "THEx_LS_MEM_WRITE_FULL", + "THEx_LS_MEM_WRITE_SHORT", + "THEx_LS_MEM_ATOMIC", + "THEx_VARY_INSTR", + "THEx_VARY_SLOT_32", + "THEx_VARY_SLOT_16", + "THEx_ATTR_INSTR", + "THEx_ARITH_INSTR_FP_MUL", + "THEx_BEATS_RD_FTC", + "THEx_BEATS_RD_FTC_EXT", + "THEx_BEATS_RD_LSC", + "THEx_BEATS_RD_LSC_EXT", + "THEx_BEATS_RD_TEX", + "THEx_BEATS_RD_TEX_EXT", + "THEx_BEATS_RD_OTHER", + "THEx_BEATS_WR_LSC", + "THEx_BEATS_WR_TIB", + "", + + /* Performance counters for the Memory System */ + "", + "", + "", + "", + "THEx_MMU_REQUESTS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "THEx_L2_RD_MSG_IN", + "THEx_L2_RD_MSG_IN_STALL", + "THEx_L2_WR_MSG_IN", + "THEx_L2_WR_MSG_IN_STALL", + "THEx_L2_SNP_MSG_IN", + "THEx_L2_SNP_MSG_IN_STALL", + "THEx_L2_RD_MSG_OUT", + "THEx_L2_RD_MSG_OUT_STALL", + "THEx_L2_WR_MSG_OUT", + "THEx_L2_ANY_LOOKUP", + "THEx_L2_READ_LOOKUP", + "THEx_L2_WRITE_LOOKUP", + "THEx_L2_EXT_SNOOP_LOOKUP", + "THEx_L2_EXT_READ", + "THEx_L2_EXT_READ_NOSNP", + "THEx_L2_EXT_READ_UNIQUE", + "THEx_L2_EXT_READ_BEATS", + "THEx_L2_EXT_AR_STALL", + "THEx_L2_EXT_AR_CNT_Q1", + "THEx_L2_EXT_AR_CNT_Q2", + "THEx_L2_EXT_AR_CNT_Q3", + "THEx_L2_EXT_RRESP_0_127", + "THEx_L2_EXT_RRESP_128_191", + "THEx_L2_EXT_RRESP_192_255", + "THEx_L2_EXT_RRESP_256_319", + "THEx_L2_EXT_RRESP_320_383", + "THEx_L2_EXT_WRITE", + "THEx_L2_EXT_WRITE_NOSNP_FULL", + "THEx_L2_EXT_WRITE_NOSNP_PTL", + "THEx_L2_EXT_WRITE_SNP_FULL", + "THEx_L2_EXT_WRITE_SNP_PTL", + "THEx_L2_EXT_WRITE_BEATS", + "THEx_L2_EXT_W_STALL", + "THEx_L2_EXT_AW_CNT_Q1", + "THEx_L2_EXT_AW_CNT_Q2", + "THEx_L2_EXT_AW_CNT_Q3", + "THEx_L2_EXT_SNOOP", + "THEx_L2_EXT_SNOOP_STALL", + "THEx_L2_EXT_SNOOP_RESP_CLEAN", + "THEx_L2_EXT_SNOOP_RESP_DATA", + "THEx_L2_EXT_SNOOP_INTERNAL", + "", + "", + "", + "", + "", + "", + "", +}; + +#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h index a962ecb3f9c6..a3377b27d487 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -96,6 +96,7 @@ GPU_ID2_PRODUCT_MODEL) #define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index de2461fb8de4..1d7e5e9b2c18 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -42,6 +42,9 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TMIX: features = base_hw_features_tMIx; break; + case GPU_ID2_PRODUCT_THEX: + features = base_hw_features_tHEx; + break; default: features = base_hw_features_generic; break; @@ -106,6 +109,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TMIX) { issues = base_hw_issues_tMIx_r0p0; + } else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_THEX) { + issues = base_hw_issues_tHEx_r0p0; } else { dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); @@ -215,6 +221,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TMIX: issues = base_hw_issues_model_tMIx; break; + case GPU_ID2_PRODUCT_THEX: + issues = base_hw_issues_model_tHEx; + break; default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h index 261453e8f1ac..0acf297192fd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,8 +26,7 @@ #include -/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when - * accessing this structure */ +/* The hwaccess_lock (a spinlock) must be held when accessing this structure */ struct kbase_hwaccess_data { struct kbase_context *active_kctx; diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index abe66078029f..c2c3909caa7b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -33,6 +33,17 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +/** + * kbase_backend_slot_update - Update state based on slot ringbuffers + * + * @kbdev: Device pointer + * + * Inspect the jobs in the slot ringbuffers and update state. + * + * This will cause jobs to be submitted to hardware if they are unblocked + */ +void kbase_backend_slot_update(struct kbase_device *kbdev); + /** * kbase_backend_find_free_address_space() - Find a free address space. * @kbdev: Device pointer @@ -88,7 +99,7 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev, * the context is not scheduled, then kbase_gpu_use_ctx() should be used * instead. * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if context is now active, false otherwise (ie if context does * not have an address space assigned) @@ -102,7 +113,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, * @kbdev: Device pointer * @kctx: Context pointer * - * Caller must hold as->transaction_mutex and runpool_irq.lock + * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock */ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -113,7 +124,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, * @kbdev: Device pointer * @kctx: Context pointer * - * Caller must hold as->transaction_mutex + * Caller must hold kbase_device->mmu_hw_mutex * * This function must perform any operations that could not be performed in IRQ * context by kbase_backend_release_ctx_irq(). diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 3e0a5892cc7a..81952e2d146f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -61,7 +61,7 @@ static void __user * get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p) { #ifdef CONFIG_COMPAT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) return compat_ptr(p->compat_value); #endif return p->value; @@ -581,24 +581,20 @@ static inline void jd_resolve_dep(struct list_head *out_list, dep_count = atomic_read(&dep_atom->dma_fence.dep_count); if (likely(dep_count == -1)) { dep_satisfied = true; - } else if (dep_count == 0) { + } else { /* - * All fences for this atom has signaled, but - * the worker that will queue the atom has not - * yet run. + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. * - * Mark the atom as handled by setting - * dep_count to -1 so that the worker doesn't - * queue the atom again. - */ - atomic_set(&dep_atom->dma_fence.dep_count, -1); - /* - * Remove the atom from the list of dma-fence - * waiting atoms. + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. */ - kbase_dma_fence_waiters_remove(dep_atom); - dep_satisfied = true; - } else { dep_satisfied = false; } #endif /* CONFIG_MALI_DMA_FENCE */ @@ -665,6 +661,40 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) } #endif +/** + * is_dep_valid - Validate that a dependency is valid for early dependency + * submission + * @katom: Dependency atom to validate + * + * A dependency is valid if any of the following are true : + * - It does not exist (a non-existent dependency does not block submission) + * - It is in the job scheduler + * - It has completed, does not have a failure event code, and has not been + * marked to fail in the future + * + * Return: true if valid, false otherwise + */ +static bool is_dep_valid(struct kbase_jd_atom *katom) +{ + /* If there's no dependency then this is 'valid' from the perspective of + * early dependency submission */ + if (!katom) + return true; + + /* Dependency must have reached the job scheduler */ + if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) + return false; + + /* If dependency has completed and has failed or will fail then it is + * not valid */ + if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && + (katom->event_code != BASE_JD_EVENT_DONE || + katom->will_fail_event_code)) + return false; + + return true; +} + static void jd_try_submitting_deps(struct list_head *out_list, struct kbase_jd_atom *node) { @@ -679,14 +709,41 @@ static void jd_try_submitting_deps(struct list_head *out_list, if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { /*Check if atom deps look sane*/ - bool dep0_valid = !dep_atom->dep[0].atom || - (dep_atom->dep[0].atom->status - >= KBASE_JD_ATOM_STATE_IN_JS); - bool dep1_valid = !dep_atom->dep[1].atom || - (dep_atom->dep[1].atom->status - >= KBASE_JD_ATOM_STATE_IN_JS); - - if (dep0_valid && dep1_valid) { + bool dep0_valid = is_dep_valid( + dep_atom->dep[0].atom); + bool dep1_valid = is_dep_valid( + dep_atom->dep[1].atom); + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = atomic_read( + &dep_atom->dma_fence.dep_count); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else { + /* + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. + * + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. + */ + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ +#ifdef CONFIG_KDS + dep_satisfied = dep_satisfied && + dep_atom->kds_dep_satisfied; +#endif + + if (dep0_valid && dep1_valid && dep_satisfied) { dep_atom->in_jd_list = true; list_add(&dep_atom->jd_item, out_list); } @@ -758,7 +815,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, for (i = 0; i < 2; i++) jd_resolve_dep(&runnable_jobs, katom, i, - kctx->jctx.sched_info.ctx.is_dying); + kbase_ctx_flag(kctx, KCTX_DYING)); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -774,7 +831,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && - !kctx->jctx.sched_info.ctx.is_dying) { + !kbase_ctx_flag(kctx, KCTX_DYING)) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; @@ -919,7 +976,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->x_pre_dep = NULL; katom->x_post_dep = NULL; katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; - katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Implicitly sets katom->protected_state.enter as well. */ + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + katom->age = kctx->age_count++; INIT_LIST_HEAD(&katom->jd_item); @@ -933,6 +993,8 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us atomic_set(&katom->dma_fence.dep_count, -1); #endif + kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_IDLE); + /* Don't do anything if there is a mess up with dependencies. This is done in a separate cycle to check both the dependencies at ones, otherwise it will be extra complexity to deal with 1st dependency ( just added to the list ) @@ -1037,10 +1099,17 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->status = KBASE_JD_ATOM_STATE_QUEUED; } + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + katom->sched_priority = sched_prio; + /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); + kbase_tlstream_tl_attrib_atom_priority(katom, katom->sched_priority); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); for (i = 0; i < 2; i++) if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( @@ -1092,12 +1161,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us goto out; } - /* For invalid priority, be most lenient and choose the default */ - sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); - if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) - sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; - katom->sched_priority = sched_prio; - if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { /* handle what we need to do to access the external resources */ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { @@ -1212,7 +1275,7 @@ int kbase_jd_submit(struct kbase_context *kctx, beenthere(kctx, "%s", "Enter"); - if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) { + if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); return -EINVAL; } @@ -1374,7 +1437,6 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbasep_js_device_data *js_devdata; u64 cache_jc = katom->jc; struct kbasep_js_atom_retained_state katom_retained_state; - bool schedule = false; bool context_idle; base_jd_core_req core_req = katom->core_req; u64 affinity = katom->affinity; @@ -1397,6 +1459,7 @@ void kbase_jd_done_worker(struct work_struct *data) * Begin transaction on JD context and JS context */ mutex_lock(&jctx->lock); + kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_DONE); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1404,7 +1467,7 @@ void kbase_jd_done_worker(struct work_struct *data) * because it only happens in response to an IRQ from a job that was * running. */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); if (katom->event_code == BASE_JD_EVENT_STOPPED) { /* Atom has been promoted to stopped */ @@ -1413,12 +1476,12 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; kbase_js_unpull(kctx, katom); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&jctx->lock); return; @@ -1436,19 +1499,6 @@ void kbase_jd_done_worker(struct work_struct *data) /* Retain state before the katom disappears */ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); - if (!kbasep_js_has_atom_finished(&katom_retained_state)) { - mutex_lock(&js_devdata->runpool_mutex); - kbasep_js_clear_job_retry_submit(katom); - /* An atom that has been hard-stopped might have previously - * been soft-stopped and has just finished before the hard-stop - * occurred. For this reason, clear the hard-stopped flag */ - katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED); - mutex_unlock(&js_devdata->runpool_mutex); - } - - if (kbasep_js_has_atom_finished(&katom_retained_state)) - schedule = true; - context_idle = kbase_js_complete_atom_wq(kctx, katom); KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); @@ -1458,54 +1508,53 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_devdata->queue_mutex); katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ - schedule |= jd_done_nolock(katom, &kctx->completed_jobs); + jd_done_nolock(katom, &kctx->completed_jobs); /* katom may have been freed now, do not use! */ if (context_idle) { unsigned long flags; + context_idle = false; mutex_lock(&js_devdata->queue_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* If kbase_sched() has scheduled this context back in then - * ctx_active will have been set after we marked it as inactive, - * and another pm reference will have been taken, so drop our - * reference. But do not call kbase_jm_idle_ctx(), as the - * context is active and fast-starting is allowed. + * KCTX_ACTIVE will have been set after we marked it as + * inactive, and another pm reference will have been taken, so + * drop our reference. But do not call kbase_jm_idle_ctx(), as + * the context is active and fast-starting is allowed. * * If an atom has been fast-started then kctx->atoms_pulled will - * be non-zero but ctx_active will still be false (as the + * be non-zero but KCTX_ACTIVE will still be false (as the * previous pm reference has been inherited). Do NOT drop our * reference, as it has been re-used, and leave the context as * active. * - * If no new atoms have been started then ctx_active will still + * If no new atoms have been started then KCTX_ACTIVE will still * be false and atoms_pulled will be zero, so drop the reference * and call kbase_jm_idle_ctx(). * * As the checks are done under both the queue_mutex and - * runpool_irq.lock is should be impossible for this to race + * hwaccess_lock is should be impossible for this to race * with the scheduler code. */ - if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) { + if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || + !atomic_read(&kctx->atoms_pulled)) { /* Calling kbase_jm_idle_ctx() here will ensure that * atoms are not fast-started when we drop the - * runpool_irq.lock. This is not performed if ctx_active - * is set as in that case another pm reference has been - * taken and a fast-start would be valid. + * hwaccess_lock. This is not performed if + * KCTX_ACTIVE is set as in that case another pm + * reference has been taken and a fast-start would be + * valid. */ - if (!kctx->ctx_active) + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) kbase_jm_idle_ctx(kbdev, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); - - kbase_pm_context_idle(kbdev); + context_idle = true; } else { - kctx->ctx_active = true; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->queue_mutex); } @@ -1519,8 +1568,7 @@ void kbase_jd_done_worker(struct work_struct *data) kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); - if (schedule) - kbase_js_sched_all(kbdev); + kbase_js_sched_all(kbdev); if (!atomic_dec_return(&kctx->work_count)) { /* If worker now idle then post all events that jd_done_nolock() @@ -1540,6 +1588,9 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, coreref_state); + if (context_idle) + kbase_pm_context_idle(kbdev); + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); } @@ -1582,7 +1633,7 @@ static void jd_cancel_worker(struct work_struct *data) * any), nor must we try to schedule out the context (it's already * scheduled out). */ - KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); /* Scheduler: Remove the job from the system */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1620,7 +1671,7 @@ static void jd_cancel_worker(struct work_struct *data) * * Context: * This can be called safely from atomic context. - * The caller must hold kbasep_js_device_data.runpool_irq.lock + * The caller must hold kbdev->hwaccess_lock */ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) @@ -1675,7 +1726,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ - KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); WARN_ON(work_pending(&katom->work)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c index 0cf75f59c282..6437e4212a29 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) /* General atom states */ mutex_lock(&kctx->jctx.lock); /* JS-related states */ - spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { struct kbase_jd_atom *atom = &atoms[i]; s64 start_timestamp = 0; @@ -84,7 +84,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) atom->time_spent_us * 1000 : start_timestamp) ); } - spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&kctx->jctx.lock); return 0; @@ -110,7 +110,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = { .release = single_release, }; -void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx) +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h index bc1878f60e8e..090f81651eb5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -30,10 +30,10 @@ #define MALI_JD_DEBUGFS_VERSION 1 /** - * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system + * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system * * @kctx Pointer to kbase_context */ -void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx); +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); #endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c index 63425322452b..0c5c6a6f78cb 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -60,7 +60,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) { u32 ret_mask = 0; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); while (js_mask) { int js = ffs(js_mask) - 1; @@ -79,7 +79,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (!down_trylock(&js_devdata->schedule_sem)) { kbase_jm_kick(kbdev, js_mask); @@ -91,7 +91,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (!down_trylock(&js_devdata->schedule_sem)) { kbase_jm_kick_all(kbdev); @@ -101,30 +101,31 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev) void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->hwaccess.active_kctx == kctx) kbdev->hwaccess.active_kctx = NULL; } -void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (katom->event_code != BASE_JD_EVENT_STOPPED && katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { - kbase_js_complete_atom(katom, NULL); + return kbase_js_complete_atom(katom, NULL); } else { kbase_js_unpull(katom->kctx, katom); + return NULL; } } -void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, - ktime_t *end_timestamp) +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); - kbase_js_complete_atom(katom, end_timestamp); + return kbase_js_complete_atom(katom, end_timestamp); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h index 27aca3a699f4..a74ee24c8058 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,7 @@ * @kbdev: Device pointer * @js_mask: Mask of the job slots that can be pulled from. * - * Caller must hold the runpool_irq lock and schedule_sem semaphore + * Caller must hold the hwaccess_lock and schedule_sem semaphore * * Return: Mask of the job slots that can still be submitted to. */ @@ -39,7 +39,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); * slots. * @kbdev: Device pointer * - * Caller must hold the runpool_irq lock and schedule_sem semaphore + * Caller must hold the hwaccess_lock and schedule_sem semaphore * * Return: Mask of the job slots that can still be submitted to. */ @@ -52,7 +52,7 @@ static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) * kbase_jm_try_kick - Attempt to call kbase_jm_kick * @kbdev: Device pointer * @js_mask: Mask of the job slots that can be pulled from - * Context: Caller must hold runpool_irq lock + * Context: Caller must hold hwaccess_lock * * If schedule_sem can be immediately obtained then this function will call * kbase_jm_kick() otherwise it will do nothing. @@ -62,7 +62,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); /** * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all * @kbdev: Device pointer - * Context: Caller must hold runpool_irq lock + * Context: Caller must hold hwaccess_lock * * If schedule_sem can be immediately obtained then this function will call * kbase_jm_kick_all() otherwise it will do nothing. @@ -80,7 +80,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev); * The context should have no atoms currently pulled from it * (kctx->atoms_pulled == 0). * - * Caller must hold the runpool_irq lock + * Caller must hold the hwaccess_lock */ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -90,17 +90,21 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); * dependency * @kbdev: Device pointer * @katom: Atom that has been stopped or will be failed + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); /** * kbase_jm_complete() - Complete an atom * @kbdev: Device pointer * @katom: Atom that has completed * @end_timestamp: Timestamp of atom completion + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, - ktime_t *end_timestamp); +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp); #endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index c591ebbcd861..60a7373601c0 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -89,7 +89,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); as_nr = kctx->as_nr; if (as_nr != KBASEP_AS_NR_INVALID) { struct kbasep_js_per_as_data *js_per_as_data; @@ -98,7 +98,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, refcnt = js_per_as_data->as_busy_refcount; } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return refcnt; } @@ -198,7 +198,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev) mutex_unlock(&kbdev->js_data.runpool_mutex); } -/* Hold the kbasep_js_device_data::runpool_irq::lock for this */ +/* Hold the hwaccess_lock for this */ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx) { @@ -248,7 +248,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); return RB_EMPTY_ROOT(&rb->runnable_tree); } @@ -259,7 +259,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) * @kctx: Pointer to kbase context with ring buffer. * @js: Job slot id to check. * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if the ring buffers for all priorities have no pullable atoms, * false otherwise. @@ -269,7 +269,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) { int prio; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) @@ -294,8 +294,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) * enumerated when this function returns jsctx->lock must be held when calling * this function. * - * The HW access lock, js_data.runpool_irq.lock, must always be held when - * calling this function. + * The HW access lock must always be held when calling this function. */ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, @@ -303,7 +302,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { struct rb_node *node = rb_first(&queue->runnable_tree); @@ -361,7 +360,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; struct rb_node *node; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); node = rb_first(&rb->runnable_tree); if (!node) @@ -379,7 +378,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * - * Caller must hold the runpool_irq.lock. + * Caller must hold the hwaccess_lock. * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ @@ -388,7 +387,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -417,7 +416,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) int js = katom->slot_nr; struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); @@ -435,7 +434,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); while (*new) { struct kbase_jd_atom *entry = container_of(*new, @@ -466,7 +465,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) static inline void jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); jsctx_tree_add(kctx, katom); } @@ -630,7 +629,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) mutex_init(&jsdd->runpool_mutex); mutex_init(&jsdd->queue_mutex); - spin_lock_init(&jsdd->runpool_irq.lock); + spin_lock_init(&kbdev->hwaccess_lock); sema_init(&jsdd->schedule_sem, 1); err = kbasep_js_policy_init(kbdev); @@ -699,14 +698,14 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE); js_kctx_info->ctx.nr_jobs = 0; - js_kctx_info->ctx.is_scheduled = false; - js_kctx_info->ctx.is_dying = false; + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + kbase_ctx_flag_clear(kctx, KCTX_DYING); memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); /* Initially, the context is disabled from submission until the create * flags are set */ - js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED; + kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS; @@ -753,7 +752,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) { /* The caller must de-register all jobs before calling this */ - KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); } @@ -763,11 +762,11 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - if (kctx->ctx_runnable_ref) { + if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); atomic_dec(&kbdev->js_data.nr_contexts_runnable); update_ctx_count = true; - kctx->ctx_runnable_ref = false; + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); } mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); @@ -789,12 +788,12 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) * kbase_js_ctx_list_add_pullable_nolock - Variant of * kbase_jd_ctx_list_add_pullable() * where the caller must hold - * runpool_irq.lock + * hwaccess_lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -804,7 +803,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -816,8 +815,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable++; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = true; + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); atomic_inc(&kbdev->js_data.nr_contexts_runnable); } } @@ -830,12 +829,12 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, * kbase_js_ctx_list_add_pullable_head_nolock - Variant of * kbase_js_ctx_list_add_pullable_head() * where the caller must hold - * runpool_irq.lock + * hwaccess_lock * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -844,7 +843,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -856,8 +855,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( kbdev->js_data.nr_contexts_pullable++; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = true; + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); atomic_inc(&kbdev->js_data.nr_contexts_runnable); } } @@ -888,9 +887,9 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, bool ret; unsigned long flags; - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } @@ -908,7 +907,7 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * This function should be used when a context has been pulled from, and there * are no jobs remaining on the specified slot. * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -918,7 +917,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js]); @@ -927,8 +926,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable--; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); } } @@ -949,7 +948,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, * This function should be used when a context has no jobs on the GPU, and no * jobs remaining for the specified slot. * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if caller should call kbase_backend_ctx_count_changed() */ @@ -959,7 +958,7 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); @@ -969,8 +968,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, kbdev->js_data.nr_contexts_pullable--; ret = true; if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); } } @@ -982,11 +981,11 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, /** * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() * where the caller must hold - * runpool_irq.lock + * hwaccess_lock * @kbdev: Device pointer * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: Context to use for specified slot. * NULL if no contexts present for specified slot @@ -997,7 +996,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( { struct kbase_context *kctx; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) return NULL; @@ -1026,9 +1025,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( struct kbase_context *kctx; unsigned long flags; - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return kctx; } @@ -1040,7 +1039,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head( * @js: Job slot to use * @is_scheduled: true if the context is currently scheduled * - * Caller must hold runpool_irq.lock + * Caller must hold hwaccess_lock * * Return: true if context can be pulled from on specified slot * false otherwise @@ -1051,7 +1050,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, struct kbasep_js_device_data *js_devdata; struct kbase_jd_atom *katom; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); js_devdata = &kctx->kbdev->js_data; @@ -1244,7 +1243,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, kbasep_js_clear_job_retry_submit(atom); /* Lock for state available during IRQ */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!kbase_js_dep_validate(kctx, atom)) { /* Dependencies could not be represented */ @@ -1254,12 +1253,13 @@ bool kbasep_js_add_job(struct kbase_context *kctx, * dependencies */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_devdata->runpool_mutex); goto out_unlock; } + kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_READY); KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); @@ -1283,14 +1283,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); /* End runpool transaction */ - if (!js_kctx_info->ctx.is_scheduled) { - if (js_kctx_info->ctx.is_dying) { + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { /* A job got added while/after kbase_job_zap_context() * was called on a non-scheduled context (e.g. KDS * dependency resolved). Kill that job by killing the @@ -1300,7 +1300,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, } else if (js_kctx_info->ctx.nr_jobs == 1) { /* Handle Refcount going from 0 to 1: schedule the * context on the Policy Queue */ - KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); /* Policy Queue was updated - caller must try to @@ -1356,7 +1356,7 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); kbasep_js_remove_job(kbdev, kctx, katom); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* The atom has 'finished' (will not be re-run), so no need to call * kbasep_js_has_atom_finished(). @@ -1366,8 +1366,7 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * whether it was soft-stopped or not */ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return attr_state_changed; } @@ -1384,9 +1383,9 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, /* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0, kbasep_js_trace_get_refcnt(kbdev, kctx)); */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return result; } @@ -1404,14 +1403,14 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); found_kctx = js_per_as_data->kctx; if (found_kctx != NULL) ++(js_per_as_data->as_busy_refcount); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return found_kctx; } @@ -1426,7 +1425,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; @@ -1448,8 +1447,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( * @katom_retained_state: Retained state from the atom * @runpool_ctx_attr_change: True if the runpool context attributes have changed * - * This collates a set of actions that must happen whilst - * kbasep_js_device_data.runpool_irq.lock is held. + * This collates a set of actions that must happen whilst hwaccess_lock is held. * * This includes running more jobs when: * - The previously released kctx caused a ctx attribute change, @@ -1476,7 +1474,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (js_devdata->nr_user_contexts_running != 0) { bool retry_submit = false; @@ -1541,7 +1539,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( js_policy = &kbdev->js_data.policy; /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); /* kctx->as_nr and js_per_as_data are only read from here. The caller's * js_ctx_mutex provides a barrier that ensures they are up-to-date. @@ -1561,8 +1559,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( */ current_as = &kbdev->as[kctx_as_nr]; mutex_lock(&kbdev->pm.lock); - mutex_lock(¤t_as->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); @@ -1577,8 +1576,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, new_ref_count); - if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED && + if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && !kbase_pm_is_suspending(kbdev)) { /* Context is kept scheduled into an address space even when * there are no jobs, in this case we have to handle the @@ -1617,8 +1615,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Ctx Attribute handling * * Releasing atoms attributes must either happen before this, or - * after 'is_scheduled' is changed, otherwise we double-decount - * the attributes */ + * after the KCTX_SHEDULED flag is changed, otherwise we + * double-decount the attributes + */ runpool_ctx_attr_change |= kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); @@ -1648,11 +1647,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kctx, slot); } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_backend_release_ctx_noirq(kbdev, kctx); - mutex_unlock(¤t_as->transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); mutex_unlock(&kbdev->pm.lock); /* Note: Don't reuse kctx_as_nr now */ @@ -1661,7 +1660,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_backend_ctx_count_changed(kbdev); /* update book-keeping info */ - js_kctx_info->ctx.is_scheduled = false; + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); /* Signal any waiter that the context is not scheduled, so is * safe for termination - once the jsctx_mutex is also dropped, * and jobs have finished. */ @@ -1674,8 +1673,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(¤t_as->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); mutex_unlock(&kbdev->pm.lock); } @@ -1710,9 +1709,9 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, /* This is called if and only if you've you've detached the context from * the Runpool or the Policy Queue, and not added it back to the Runpool */ - KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - if (js_kctx_info->ctx.is_dying) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { /* Dying: don't requeue, but kill all jobs on the context. This * happens asynchronously */ dev_dbg(kbdev->dev, @@ -1806,7 +1805,7 @@ static void kbasep_js_runpool_release_ctx_no_schedule( void kbase_js_set_timeouts(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); kbase_backend_timeouts_changed(kbdev); } @@ -1841,7 +1840,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, mutex_lock(&js_devdata->runpool_mutex); /* Check to see if context is dying due to kbase_job_zap_context() */ - if (js_kctx_info->ctx.is_dying) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { /* Roll back the transaction so far and return */ kbase_backend_release_free_address_space(kbdev, as_nr); @@ -1855,17 +1854,17 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, 0u, kbasep_js_trace_get_refcnt(kbdev, kctx)); - js_kctx_info->ctx.is_scheduled = true; + kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); - mutex_lock(&new_address_space->transaction_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Assign context to previously chosen address space */ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&new_address_space->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); /* Roll back the transaction so far and return */ - js_kctx_info->ctx.is_scheduled = false; + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); kbase_backend_release_free_address_space(kbdev, as_nr); @@ -1892,8 +1891,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, * we just want it out immediately. * * The DMB required to read the suspend flag was issued recently as part - * of the runpool_irq locking. If a suspend occurs *after* that lock was - * taken (i.e. this condition doesn't execute), then the + * of the hwaccess_lock locking. If a suspend occurs *after* that lock + * was taken (i.e. this condition doesn't execute), then the * kbasep_js_suspend() code will cleanup this context instead (by virtue * of it being called strictly after the suspend flag is set, and will * wait for this lock to drop) */ @@ -1909,8 +1908,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, } /* Transaction complete */ - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&new_address_space->transaction_mutex); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); /* Synchronize with any policy timers */ kbase_backend_ctx_count_changed(kbdev); @@ -1934,18 +1933,18 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_backend_use_ctx_sched(kbdev, kctx)) { /* Context already has ASID - mark as active */ kbdev->hwaccess.active_kctx = kctx; - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return true; /* Context already scheduled */ } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return kbasep_js_schedule_ctx(kbdev, kctx); } @@ -1971,9 +1970,9 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, mutex_lock(&js_kctx_info->ctx.jsctx_mutex); /* Mark the context as privileged */ - js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED; + kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); - is_scheduled = js_kctx_info->ctx.is_scheduled; + is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); if (!is_scheduled) { /* Add the context to the pullable list */ if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) @@ -1989,7 +1988,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, /* Wait for the context to be scheduled in */ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - kctx->jctx.sched_info.ctx.is_scheduled); + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); } else { /* Already scheduled in - We need to retain it to keep the * corresponding address space */ @@ -2010,7 +2009,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /* We don't need to use the address space anymore */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); + kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); /* Release the context - it will be scheduled out */ @@ -2032,7 +2031,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Prevent all contexts from submitting */ js_devdata->runpool_irq.submit_allowed = 0; @@ -2052,15 +2051,15 @@ void kbasep_js_suspend(struct kbase_device *kbdev) /* We can only cope with up to 1 privileged context - * the instrumented context. It'll be suspended by * disabling instrumentation */ - if (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) { + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { ++nr_privileged_ctx; WARN_ON(nr_privileged_ctx != 1); } } } CSTD_UNUSED(nr_privileged_ctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* De-ref the previous retain to ensure each context gets pulled out * sometime later. */ @@ -2103,16 +2102,14 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!js_kctx_info->ctx.is_scheduled && + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && kbase_js_ctx_pullable(kctx, js, false)) timer_sync = kbase_js_ctx_list_add_pullable_nolock( kbdev, kctx, js); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); @@ -2167,7 +2164,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to @@ -2177,9 +2174,6 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } else { enqueue_required = false; } - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); - if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || (katom->pre_dep && (katom->pre_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { @@ -2191,6 +2185,9 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; enqueue_required = false; } else { + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + /* Add atom to ring buffer. */ jsctx_tree_add(kctx, katom); katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; @@ -2210,7 +2207,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, */ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) { - lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); while (katom) { WARN_ON(!(katom->atom_flags & @@ -2250,7 +2247,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *x_dep = katom->x_post_dep; struct kbase_jd_atom *next_katom = katom->post_dep; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); if (next_katom) { KBASE_DEBUG_ASSERT(next_katom->status != @@ -2283,7 +2280,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); js_devdata = &kctx->kbdev->js_data; - lockdep_assert_held(&js_devdata->runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) return NULL; @@ -2318,11 +2315,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) return NULL; } - kctx->pulled = true; + kbase_ctx_flag_set(kctx, KCTX_PULLED); + pulled = atomic_inc_return(&kctx->atoms_pulled); if (pulled == 1 && !kctx->slots_pullable) { - WARN_ON(kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = true; + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable); } atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); @@ -2354,7 +2352,7 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; - kbase_tlstream_aux_job_softstop_ex(katom); + kbase_tlstream_tl_event_atom_softstop_ex(katom); kbase_backend_complete_wq(kbdev, katom); @@ -2371,7 +2369,7 @@ static void js_return_worker(struct work_struct *data) atomic_dec(&katom->blocked); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!atomic_read(&kctx->atoms_pulled_slot[js]) && jsctx_rb_none_to_pull(kctx, js)) @@ -2379,14 +2377,14 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled)) { if (!kctx->slots_pullable) { - WARN_ON(!kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); timer_sync = true; } if (kctx->as_nr != KBASEP_AS_NR_INVALID && - !js_kctx_info->ctx.is_dying) { + !kbase_ctx_flag(kctx, KCTX_DYING)) { int num_slots = kbdev->gpu_props.num_job_slots; int slot; @@ -2406,11 +2404,11 @@ static void js_return_worker(struct work_struct *data) context_idle = true; } - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (context_idle) { - WARN_ON(!kctx->ctx_active); - kctx->ctx_active = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); } @@ -2432,7 +2430,7 @@ static void js_return_worker(struct work_struct *data) void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); jsctx_rb_unpull(kctx, katom); @@ -2468,7 +2466,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -2476,8 +2474,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable) { - WARN_ON(!kctx->ctx_runnable_ref); - kctx->ctx_runnable_ref = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); atomic_dec(&kbdev->js_data.nr_contexts_runnable); timer_sync = true; } @@ -2499,7 +2497,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, */ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && !atomic_read(&kctx->atoms_pulled) && - !js_kctx_info->ctx.is_dying) { + !kbase_ctx_flag(kctx, KCTX_DYING)) { int js; kbasep_js_set_submit_allowed(js_devdata, kctx); @@ -2526,9 +2524,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) - kctx->ctx_active = false; + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (timer_sync) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); @@ -2536,7 +2534,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, return context_idle; } -void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp) { u64 microseconds_spent = 0; struct kbase_device *kbdev; @@ -2548,7 +2547,7 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) js_policy = &kbdev->js_data.policy; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); if (katom->will_fail_event_code) katom->event_code = katom->will_fail_event_code; @@ -2599,7 +2598,12 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) false)) kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, x_dep->slot_nr); + + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) + return x_dep; } + + return NULL; } void kbase_js_sched(struct kbase_device *kbdev, int js_mask) @@ -2629,7 +2633,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* No contexts on pullable list */ } - if (!kctx->ctx_active) { + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { context_idle = true; if (kbase_pm_context_active_handle_suspend( @@ -2648,18 +2652,16 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) up(&js_devdata->schedule_sem); return; } - kctx->ctx_active = true; + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); } if (!kbase_js_use_ctx(kbdev, kctx)) { mutex_lock( &kctx->jctx.sched_info.ctx.jsctx_mutex); /* Context can not be used at this time */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, - flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbase_js_ctx_pullable(kctx, js, false) - || (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED)) + || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) timer_sync |= kbase_js_ctx_list_add_pullable_head_nolock( kctx->kbdev, kctx, js); @@ -2667,13 +2669,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); - spin_unlock_irqrestore( - &js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); mutex_unlock( &kctx->jctx.sched_info.ctx.jsctx_mutex); if (context_idle) { - WARN_ON(!kctx->ctx_active); - kctx->ctx_active = false; + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); } @@ -2682,15 +2684,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; } mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kctx->pulled = false; + kbase_ctx_flag_clear(kctx, KCTX_PULLED); if (!kbase_jm_kick(kbdev, 1 << js)) /* No more jobs can be submitted on this slot */ js_mask &= ~(1 << js); - if (!kctx->pulled) { + if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { /* Failed to pull jobs - push to head of list */ if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= @@ -2706,15 +2708,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) if (context_idle) { kbase_jm_idle_ctx(kbdev, kctx); spin_unlock_irqrestore( - &js_devdata->runpool_irq.lock, - flags); - WARN_ON(!kctx->ctx_active); - kctx->ctx_active = false; + &kbdev->hwaccess_lock, + flags); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); kbase_pm_context_idle(kbdev); } else { spin_unlock_irqrestore( - &js_devdata->runpool_irq.lock, - flags); + &kbdev->hwaccess_lock, + flags); } mutex_unlock( &kctx->jctx.sched_info.ctx.jsctx_mutex); @@ -2732,8 +2734,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) timer_sync |= kbase_js_ctx_list_add_unpullable_nolock( kctx->kbdev, kctx, js); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, - flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); } } @@ -2764,7 +2766,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - js_kctx_info->ctx.is_dying = true; + kbase_ctx_flag_set(kctx, KCTX_DYING); dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); @@ -2804,7 +2806,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) * jobs and releasing the Power manager active reference will be * handled when it leaves the runpool. */ - if (!js_kctx_info->ctx.is_scheduled) { + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (!list_empty( &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -2824,7 +2826,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) */ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, - js_kctx_info->ctx.is_scheduled); + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); @@ -2845,11 +2847,11 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Case c: didn't evict, but it is scheduled - it's in the Run * Pool */ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, - js_kctx_info->ctx.is_scheduled); + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_js_clear_submit_allowed(js_devdata, kctx); @@ -2870,7 +2872,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) kbase_job_slot_hardstop(kctx, js, NULL); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); @@ -2942,7 +2944,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbdev = kctx->kbdev; js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, trace_get_refcnt(kbdev, kctx)); @@ -2951,5 +2953,5 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) jsctx_queue_foreach(kctx, js, callback); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 66b213293016..8969222c3389 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -27,6 +27,7 @@ #include "mali_kbase_js_defs.h" #include "mali_kbase_js_policy.h" +#include "mali_kbase_context.h" #include "mali_kbase_defs.h" #include "mali_kbase_debug.h" @@ -151,8 +152,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); * * The following locking conditions are made on the caller: * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be - * obtained internally) + * - it must \em not hold hwaccess_lock (as this will be obtained internally) * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). @@ -213,8 +213,8 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx * * The following locking conditions are made on the caller: * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be - * obtained internally) + * - it must \em not hold the hwaccess_lock, (as this will be obtained + * internally) * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be * obtained internally) * @@ -233,8 +233,7 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * @note This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because - * it will be used internally. + * - it must \em not hold the hwaccess_lock, because it will be used internally. * * @return value != false if the retain succeeded, and the context will not be scheduled out. * @return false if the retain failed (because the context is being/has been scheduled out). @@ -248,7 +247,7 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_conte * @note This function can safely be called from IRQ context. * * The following locks must be held by the caller: - * - kbasep_js_device_data::runpool_irq::lock + * - hwaccess_lock * * @return value != false if the retain succeeded, and the context will not be scheduled out. * @return false if the retain failed (because the context is being/has been scheduled out). @@ -266,9 +265,9 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbas * @note This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because - * it will be used internally. If the runpool_irq::lock is already held, then - * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead. + * - it must \em not hold the hwaccess_lock, because it will be used internally. + * If the hwaccess_lock is already held, then the caller should use + * kbasep_js_runpool_lookup_ctx_nolock() instead. * * @return a valid struct kbase_context on success, which has been refcounted as being busy. * @return NULL on failure, indicating that no context was found in \a as_nr @@ -288,7 +287,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, i * Note: This function can safely be called from IRQ context. * * The following locking conditions are made on the caller: - * - it must the kbasep_js_device_data::runpoool_irq::lock. + * - it must the hold the hwaccess_lock * * Return: a valid struct kbase_context on success, which has been refcounted as * being busy. @@ -362,12 +361,12 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kb * scheduled, or that already has a zero refcount. * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because - * it will be used internally. + * - it must \em not hold the hwaccess_lock, because it will be used internally. * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be * obtained internally) * @@ -411,11 +410,11 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, * kbasep_js_release_privileged_ctx is called). * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because - * it will be used internally. + * - it must \em not hold the hwaccess_lock, because it will be used internally. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will * be used internally. @@ -429,12 +428,12 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_ * See kbasep_js_runpool_release_ctx for potential side effects. * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because - * it will be used internally. + * - it must \em not hold the hwaccess_lock, because it will be used internally. * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be * obtained internally) - * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) * */ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); @@ -444,7 +443,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_c * * The following locks may be used: * - kbasep_js_device_data::runpool_mutex - * - kbasep_js_device_data::runpool_irq::lock + * - hwaccess_lock */ void kbase_js_try_run_jobs(struct kbase_device *kbdev); @@ -564,8 +563,10 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * * @param[in] katom Pointer to the atom to complete * @param[in] end_timestamp The time that the atom completed (may be NULL) + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none */ -void kbase_js_complete_atom(struct kbase_jd_atom *katom, +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp); /** @@ -630,7 +631,7 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev); * * As with any bool, never test the return value with true. * - * The caller must hold kbasep_js_device_data::runpool_irq::lock. + * The caller must hold hwaccess_lock. */ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -638,7 +639,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_ /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); test_bit = (u16) (1u << kctx->as_nr); @@ -651,7 +652,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_ * The purpose of this abstraction is to hide the underlying data size, and wrap up * the long repeated line of code. * - * The caller must hold kbasep_js_device_data::runpool_irq::lock. + * The caller must hold hwaccess_lock. */ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -659,7 +660,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); set_bit = (u16) (1u << kctx->as_nr); @@ -674,7 +675,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js * The purpose of this abstraction is to hide the underlying data size, and wrap up * the long repeated line of code. * - * The caller must hold kbasep_js_device_data::runpool_irq::lock. + * The caller must hold hwaccess_lock. */ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) { @@ -683,7 +684,7 @@ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data * /* Ensure context really is scheduled in */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; @@ -792,7 +793,7 @@ static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_a /** * Debug Check the refcount of a context. Only use within ASSERTs * - * Obtains kbasep_js_device_data::runpool_irq::lock + * Obtains hwaccess_lock * * @return negative value if the context is not scheduled in * @return current refcount of the context if it is scheduled in. The refcount @@ -809,12 +810,12 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(kctx != NULL); js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); as_nr = kctx->as_nr; if (as_nr != KBASEP_AS_NR_INVALID) result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return result; } @@ -829,8 +830,7 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, * when there is no ctx in \a as_nr (NULL returned). * * The following locking conditions are made on the caller: - * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because - * it will be used internally. + * - it must \em not hold the hwaccess_lock, because it will be used internally. * * @return a valid struct kbase_context on success, with a refcount that is guarenteed * to be non-zero and unmodified by this function. @@ -848,12 +848,12 @@ static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct js_devdata = &kbdev->js_data; js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); found_kctx = js_per_as_data->kctx; KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return found_kctx; } @@ -948,7 +948,7 @@ static inline void kbase_js_runpool_inc_context_count( KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); ++(js_devdata->nr_all_contexts_running); - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { /* Track contexts that can submit jobs */ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < S8_MAX); @@ -981,7 +981,7 @@ static inline void kbase_js_runpool_dec_context_count( --(js_devdata->nr_all_contexts_running); KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { /* Track contexts that can submit jobs */ --(js_devdata->nr_user_contexts_running); KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index e6e611b9f415..455b661a1176 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,9 +51,9 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s js_kctx_info = &kctx->jctx.sched_info; lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); @@ -97,8 +97,8 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, js_kctx_info = &kctx->jctx.sched_info; lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); + lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); @@ -136,13 +136,13 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); js_kctx_info = &kctx->jctx.sched_info; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); - if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); @@ -176,8 +176,8 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); - if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->hwaccess_lock); /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); @@ -202,7 +202,7 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) { + if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { /* This context never submits, so don't track any scheduling attributes */ return; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index e1342045b394..e6a9d41b6a08 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -44,17 +44,6 @@ struct kbase_device; struct kbase_jd_atom; -/* Types used by the policies must go here */ -enum { - /** Context will not submit any jobs */ - KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), - - /** Set if the context uses an address space and should be kept scheduled in */ - KBASE_CTX_FLAG_PRIVILEGED = (1u << 1) - - /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ -}; - typedef u32 kbase_context_flags; struct kbasep_atom_req { @@ -185,9 +174,8 @@ typedef u32 kbasep_js_atom_done_code; /** * Data used by the scheduler that is unique for each Address Space. * - * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock - * must be held whilst accessing this data (inculding reads and atomic - * decisions based on the read). + * This is used in IRQ context and hwaccess_lock must be held whilst accessing + * this data (inculding reads and atomic decisions based on the read). */ struct kbasep_js_per_as_data { /** @@ -218,27 +206,9 @@ struct kbasep_js_per_as_data { * to remove masking). */ struct kbasep_js_device_data { - /** Sub-structure to collect together Job Scheduling data used in IRQ context */ + /* Sub-structure to collect together Job Scheduling data used in IRQ + * context. The hwaccess_lock must be held when accessing. */ struct runpool_irq { - /** - * Lock for accessing Job Scheduling data used in IRQ context - * - * This lock must be held whenever this data is accessed (read, or - * write). Even for read-only access, memory barriers would be needed. - * In any case, it is likely that decisions based on only reading must - * also be atomic with respect to data held here and elsewhere in the - * Job Scheduler. - * - * This lock must also be held for accessing: - * - kbase_context::as_nr - * - kbase_device::jm_slots - * - Parts of the kbasep_js_policy, dependent on the policy (refer to - * the policy in question for more information) - * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to - * the policy in question for more information) - */ - spinlock_t lock; - /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. * When bit 'N' is set in this, it indicates whether the context bound to address space * 'N' (per_as_data[N].kctx) is allowed to submit jobs. @@ -383,8 +353,8 @@ struct kbasep_js_kctx_info { * Runpool substructure. This must only be accessed whilst the Run Pool * mutex ( kbasep_js_device_data::runpool_mutex ) is held. * - * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be - * held for certain sub-members. + * In addition, the hwaccess_lock may need to be held for certain + * sub-members. * * @note some of the members could be moved into struct kbasep_js_device_data for * improved d-cache/tlb efficiency. @@ -418,21 +388,11 @@ struct kbasep_js_kctx_info { * the context. **/ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; - kbase_context_flags flags; - /* NOTE: Unify the following flags into kbase_context_flags */ /** - * Is the context scheduled on the Run Pool? - * - * This is only ever updated whilst the jsctx_mutex is held. - */ - bool is_scheduled; - /** - * Wait queue to wait for is_scheduled state changes. + * Wait queue to wait for KCTX_SHEDULED flag state changes. * */ wait_queue_head_t is_scheduled_wait; - bool is_dying; /**< Is the context in the process of being evicted? */ - /** Link implementing JS queues. Context can be present on one * list per job slot */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h index debd0117d45a..d1f3a0abea1f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -531,7 +531,7 @@ void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct * The locking conditions on the caller are as follows: * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. * - it will be holding kbasep_js_device_data::runpool_mutex. - * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * - it will be holding hwaccess_lock (a spinlock) * * Due to a spinlock being held, this function must not call any APIs that sleep. */ @@ -548,7 +548,7 @@ void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct * The locking conditions on the caller are as follows: * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. * - it will be holding kbasep_js_device_data::runpool_mutex. - * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * - it will be holding hwaccess_lock (a spinlock) * * Due to a spinlock being held, this function must not call any APIs that sleep. */ @@ -558,7 +558,7 @@ void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, stru * @brief Indicate whether a context should be removed from the Run Pool * (should be scheduled out). * - * The kbasep_js_device_data::runpool_irq::lock will be held by the caller. + * The hwaccess_lock will be held by the caller. * * @note This API is called from IRQ context. */ @@ -715,7 +715,7 @@ bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, * the policy that the job should be run again at some point later. * * The caller has the following conditions on locking: - * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held. + * - hwaccess_lock (a spinlock) will be held. * - kbasep_js_device_data::runpool_mutex will be held. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. */ @@ -746,7 +746,7 @@ void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbas * @note This API is called from IRQ context. * * The caller has the following conditions on locking: - * - kbasep_js_device_data::runpool_irq::lock will be held. + * - hwaccess_lock will be held. * * @param js_policy job scheduler policy * @param katom job dispatch atom diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c index 90c13458ec7c..1ac05693b62c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -149,9 +149,9 @@ static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, js_devdata = &kbdev->js_data; - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return refcnt; } @@ -223,7 +223,7 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context * * head_runtime_us occur strictly after this context is initialized */ mutex_lock(&js_devdata->queue_mutex); - /* No need to hold the the runpool_irq.lock here, because we're initializing + /* No need to hold the the hwaccess_lock here, because we're initializing * the value, and the context is definitely not being updated in the * runpool at this point. The queue_mutex ensures the memory barrier. */ ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h index b457d8215abe..0a8454c033d8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,10 +54,10 @@ struct kbasep_js_policy_cfs { * after ctx init * @process_priority: calling process NICE priority, in the range -20..19 * - * &kbasep_js_device_data.runpool_irq.lock must be held when updating - * @runtime_us. Initializing will occur on context init and context enqueue - * (which can only occur in one thread at a time), but multi-thread access only - * occurs while the context is in the runpool. + * hwaccess_lock must be held when updating @runtime_us. Initializing will occur + * on context init and context enqueue (which can only occur in one thread at a + * time), but multi-thread access only occurs while the context is in the + * runpool. * * Reads are possible without the spinlock, but an older value might be read if * no memory barries are issued beforehand. @@ -72,7 +72,7 @@ struct kbasep_js_policy_cfs_ctx { * struct kbasep_js_policy_cfs_job - per job information for CFS * @ticks: number of ticks that this job has been executing for * - * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks. + * hwaccess_lock must be held when accessing @ticks. */ struct kbasep_js_policy_cfs_job { u32 ticks; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index c1851caa95a0..4824b31673d3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -485,7 +485,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) #endif #ifdef CONFIG_64BIT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) same_va_bits = 32; else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) same_va_bits = 33; @@ -509,7 +509,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) #ifdef CONFIG_64BIT /* 32-bit clients have exec and custom VA zones */ - if (kctx->is_compat) { + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { err = -EINVAL; @@ -573,7 +573,7 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) * Nothing to do for 32-bit clients, JIT uses the existing * custom VA zone. */ - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) return 0; #if defined(CONFIG_ARM64) @@ -1730,7 +1730,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); -void kbase_jit_debugfs_add(struct kbase_context *kctx) +void kbase_jit_debugfs_init(struct kbase_context *kctx) { /* Debugfs entry for getting the number of JIT allocations. */ debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 7b2433e868bd..8953c852e765 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -400,7 +400,8 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; INIT_LIST_HEAD(®->cpu_alloc->evict_node); - if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { + if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) + && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; @@ -487,7 +488,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool); * 1. If there are free pages in the pool, allocate a page from @pool. * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page * from @next_pool. - * 3. Finally, allocate a page from the kernel. + * 3. Return NULL if no memory in the pool * * Return: Pointer to allocated page, or NULL if allocation failed. */ @@ -572,19 +573,39 @@ static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) */ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); +/** + * kbase_mem_pool_grow - Grow the pool + * @pool: Memory pool to grow + * @nr_to_grow: Number of pages to add to the pool + * + * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to + * become larger than the maximum size specified. + * + * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages + */ +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); + /** * kbase_mem_pool_trim - Grow or shrink the pool to a new size * @pool: Memory pool to trim * @new_size: New number of pages in the pool * * If @new_size > @cur_size, fill the pool with new pages from the kernel, but - * not above @max_size. + * not above the max_size for the pool. * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. - * - * Return: The new size of the pool */ -size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); +/* + * kbase_mem_alloc_page - Allocate a new page for a device + * @kbdev: The kbase device + * + * Most uses should use kbase_mem_pool_alloc to allocate a page. However that + * function can fail in the event the pool is empty. + * + * Return: A new page or NULL if no memory + */ +struct page *kbase_mem_alloc_page(struct kbase_device *kbdev); int kbase_region_tracker_init(struct kbase_context *kctx); int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); @@ -647,8 +668,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); /** * The caller has the following locking conditions: - * - It must hold kbase_as::transaction_mutex on kctx's address space - * - It must hold the kbasep_js_device_data::runpool_irq::lock + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock */ void kbase_mmu_update(struct kbase_context *kctx); @@ -660,8 +681,8 @@ void kbase_mmu_update(struct kbase_context *kctx); * data from provided kbase context from the GPU caches. * * The caller has the following locking conditions: - * - It must hold kbase_as::transaction_mutex on kctx's address space - * - It must hold the kbasep_js_device_data::runpool_irq::lock + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock */ void kbase_mmu_disable(struct kbase_context *kctx); @@ -674,7 +695,7 @@ void kbase_mmu_disable(struct kbase_context *kctx); * This function must only be called during reset/power-up and it used to * ensure the registers are in a known state. * - * The caller must hold kbdev->as[as_nr].transaction_mutex. + * The caller must hold kbdev->mmu_hw_mutex. */ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); @@ -894,10 +915,10 @@ void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, #ifdef CONFIG_DEBUG_FS /** - * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. + * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. * @kctx: kbase context */ -void kbase_jit_debugfs_add(struct kbase_context *kctx); +void kbase_jit_debugfs_init(struct kbase_context *kctx); #endif /* CONFIG_DEBUG_FS */ /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index f91d3c916355..b6dac5586a7a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -30,13 +30,15 @@ #include #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - #include -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +#include +#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ #include +#include #include #include @@ -112,7 +114,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages goto bad_size; #if defined(CONFIG_64BIT) - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) cpu_va_bits = 32; #endif @@ -208,18 +210,19 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } /* - * Pre-10.1 UKU userland calls mmap for us so return the - * unaligned address and skip the map. + * 10.1-10.4 UKU userland relies on the kernel to call mmap. + * For all other versions we can just return the cookie */ - if (kctx->api_version < KBASE_API_VERSION(10, 1)) { + if (kctx->api_version < KBASE_API_VERSION(10, 1) || + kctx->api_version > KBASE_API_VERSION(10, 4)) { *gpu_va = (u64) cookie; return reg; } /* - * GPUCORE-2190: - * - * We still need to return alignment for old userspace. + * To achieve alignment and avoid allocating on large alignment + * (to work around a GPU hardware issue) we must allocate 3 + * times the required size. */ if (*va_alignment) va_map += 3 * (1UL << *va_alignment); @@ -233,8 +236,10 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages MAP_SHARED, cookie); if (IS_ERR_VALUE(cpu_addr)) { + kbase_gpu_vm_lock(kctx); kctx->pending_regions[cookie_nr] = NULL; kctx->cookies |= (1UL << cookie_nr); + kbase_gpu_vm_unlock(kctx); goto no_mmap; } @@ -1036,7 +1041,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in shared_zone = true; #ifdef CONFIG_64BIT - if (!kctx->is_compat) { + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. @@ -1133,7 +1138,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( shared_zone = true; #ifdef CONFIG_64BIT - if (!kctx->is_compat) { + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use * on the GPU. @@ -1266,7 +1271,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, *num_pages = nents * stride; #ifdef CONFIG_64BIT - if (!kctx->is_compat) { + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ *flags |= BASE_MEM_NEED_MMAP; @@ -1358,7 +1363,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, } #ifdef CONFIG_64BIT - if (!kctx->is_compat) { + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* Bind to a cookie */ if (!kctx->cookies) { dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); @@ -1411,6 +1416,32 @@ bad_flags: return 0; } +static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx) +{ + u32 cpu_cache_line_size = cache_line_size(); + u32 gpu_cache_line_size = + (1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + + return ((cpu_cache_line_size > gpu_cache_line_size) ? + cpu_cache_line_size : + gpu_cache_line_size); +} + +static int kbase_check_buffer_size(struct kbase_context *kctx, u64 size) +{ + u32 cache_line_align = kbase_get_cache_line_alignment(kctx); + + return (size & (cache_line_align - 1)) == 0 ? 0 : -EINVAL; +} + +static int kbase_check_buffer_cache_alignment(struct kbase_context *kctx, + void __user *ptr) +{ + u32 cache_line_align = kbase_get_cache_line_alignment(kctx); + + return ((uintptr_t)ptr & (cache_line_align - 1)) == 0 ? 0 : -EINVAL; +} + int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, void __user *phandle, u64 *gpu_va, u64 *va_pages, u64 *flags) @@ -1423,7 +1454,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, KBASE_DEBUG_ASSERT(flags); #ifdef CONFIG_64BIT - if (!kctx->is_compat) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) *flags |= BASE_MEM_SAME_VA; #endif @@ -1466,12 +1497,26 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, reg = NULL; } else { #ifdef CONFIG_COMPAT - if (kctx->is_compat) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) uptr = compat_ptr(user_buffer.ptr.compat_value); else #endif uptr = user_buffer.ptr.value; + if (0 != kbase_check_buffer_cache_alignment(kctx, + uptr)) { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned!\n"); + goto no_reg; + } + + if (0 != kbase_check_buffer_size(kctx, + user_buffer.length)) { + dev_warn(kctx->kbdev->dev, + "User buffer size is not multiple of cache line size!\n"); + goto no_reg; + } + reg = kbase_mem_from_user_buffer(kctx, (unsigned long)uptr, user_buffer.length, va_pages, flags); @@ -2146,44 +2191,6 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx) up_read(&mm->mmap_sem); } -#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE) -/* This section is required only for instrumentation. */ - -static void kbase_dma_buf_vm_open(struct vm_area_struct *vma) -{ - struct kbase_cpu_mapping *map = vma->vm_private_data; - - KBASE_DEBUG_ASSERT(map); - KBASE_DEBUG_ASSERT(map->count > 0); - /* Non-atomic as we're under Linux's mm lock. */ - map->count++; -} - -static void kbase_dma_buf_vm_close(struct vm_area_struct *vma) -{ - struct kbase_cpu_mapping *map = vma->vm_private_data; - - KBASE_DEBUG_ASSERT(map); - KBASE_DEBUG_ASSERT(map->count > 0); - - /* Non-atomic as we're under Linux's mm lock. */ - if (--map->count) - return; - - KBASE_DEBUG_ASSERT(map->kctx); - - kbase_gpu_vm_lock(map->kctx); - list_del(&map->mappings_list); - kbase_gpu_vm_unlock(map->kctx); - kfree(map); -} - -static const struct vm_operations_struct kbase_dma_mmap_ops = { - .open = kbase_dma_buf_vm_open, - .close = kbase_dma_buf_vm_close, -}; -#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */ - int kbase_mmap(struct file *file, struct vm_area_struct *vma) { struct kbase_context *kctx = file->private_data; @@ -2402,35 +2409,6 @@ map: #ifdef CONFIG_DMA_SHARED_BUFFER dma_map: err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn); -#if defined(CONFIG_MALI_TRACE_TIMELINE) - /* This section is required only for instrumentation. */ - /* Add created mapping to imported region mapping list. - * It is important to make it visible to dumping infrastructure. - * Add mapping only if vm_ops structure is not used by memory owner. */ - WARN_ON(vma->vm_ops); - WARN_ON(vma->vm_private_data); - if (!err && !vma->vm_ops && !vma->vm_private_data) { - struct kbase_cpu_mapping *map = kzalloc( - sizeof(*map), - GFP_KERNEL); - - if (map) { - map->kctx = reg->kctx; - map->region = NULL; - map->page_off = vma->vm_pgoff; - map->vm_start = vma->vm_start; - map->vm_end = vma->vm_end; - map->count = 1; /* start with one ref */ - - vma->vm_ops = &kbase_dma_mmap_ops; - vma->vm_private_data = map; - - list_add( - &map->mappings_list, - ®->cpu_alloc->mappings); - } - } -#endif /* CONFIG_MALI_TRACE_TIMELINE */ #endif /* CONFIG_DMA_SHARED_BUFFER */ out_unlock: kbase_gpu_vm_unlock(kctx); @@ -2719,7 +2697,9 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_ dma_addr_t dma_pa; struct kbase_va_region *reg; phys_addr_t *page_array; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) + unsigned long attrs = DMA_ATTR_WRITE_COMBINE; +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) DEFINE_DMA_ATTRS(attrs); #endif @@ -2735,9 +2715,13 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_ goto err; /* All the alloc calls return zeroed memory */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) + va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, + attrs); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs); + va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, + &attrs); #else va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); #endif @@ -2784,7 +2768,9 @@ no_mmap: no_alloc: kfree(reg); no_reg: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) + dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); #else dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); @@ -2798,7 +2784,8 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han { struct kbase_va_region *reg; int err; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) DEFINE_DMA_ATTRS(attrs); #endif @@ -2816,7 +2803,10 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) + dma_free_attrs(kctx->kbdev->dev, handle->size, + handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); dma_free_attrs(kctx->kbdev->dev, handle->size, handle->cpu_va, handle->dma_pa, &attrs); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 957061893b00..9a3f9b571d96 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -175,11 +175,11 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, kbase_mem_pool_add(next_pool, p); } -static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool) +struct page *kbase_mem_alloc_page(struct kbase_device *kbdev) { struct page *p; gfp_t gfp; - struct device *dev = pool->kbdev->dev; + struct device *dev = kbdev->dev; dma_addr_t dma_addr; #if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ @@ -210,8 +210,6 @@ static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool) kbase_set_dma_addr(p, dma_addr); - pool_dbg(pool, "alloced page from kernel\n"); - return p; } @@ -256,36 +254,35 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, return nr_freed; } -static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow) { struct page *p; size_t i; - for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { - p = kbase_mem_pool_alloc_page(pool); + for (i = 0; i < nr_to_grow; i++) { + p = kbase_mem_alloc_page(pool->kbdev); if (!p) - break; + return -ENOMEM; kbase_mem_pool_add(pool, p); } - return i; + return 0; } -size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) { size_t cur_size; cur_size = kbase_mem_pool_size(pool); + if (new_size > pool->max_size) + new_size = pool->max_size; + if (new_size < cur_size) kbase_mem_pool_shrink(pool, cur_size - new_size); else if (new_size > cur_size) kbase_mem_pool_grow(pool, new_size - cur_size); - - cur_size = kbase_mem_pool_size(pool); - - return cur_size; } void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) @@ -428,21 +425,17 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) { struct page *p; - pool_dbg(pool, "alloc()\n"); - - p = kbase_mem_pool_remove(pool); + do { + pool_dbg(pool, "alloc()\n"); + p = kbase_mem_pool_remove(pool); - if (!p && pool->next_pool) { - /* Allocate via next pool */ - return kbase_mem_pool_alloc(pool->next_pool); - } + if (p) + return p; - if (!p) { - /* Get page from kernel */ - p = kbase_mem_pool_alloc_page(pool); - } + pool = pool->next_pool; + } while (pool); - return p; + return NULL; } void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, @@ -499,7 +492,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, /* Get any remaining pages from kernel */ for (; i < nr_pages; i++) { - p = kbase_mem_pool_alloc_page(pool); + p = kbase_mem_alloc_page(pool->kbdev); if (!p) goto err_rollback; pages[i] = page_to_phys(p); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c index 493665b7e607..585fba036c9e 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, kbase_mem_pool_debugfs_max_size_set, "%llu\n"); -void kbase_mem_pool_debugfs_add(struct dentry *parent, +void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_mem_pool *pool) { debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h index 458f3f09e697..1442854e8956 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ #include /** - * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool + * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool * @parent: Parent debugfs dentry * @pool: Memory pool to control * @@ -29,7 +29,7 @@ * - mem_pool_size: get/set the current size of @pool * - mem_pool_max_size: get/set the max size of @pool */ -void kbase_mem_pool_debugfs_add(struct dentry *parent, +void kbase_mem_pool_debugfs_init(struct dentry *parent, struct kbase_mem_pool *pool); #endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index 03594102f7ef..092da9a96ea5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -67,26 +67,27 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, mutex_lock(&kctx->mem_profile_lock); dev_dbg(kctx->kbdev->dev, "initialised: %d", - kctx->mem_profile_initialized); + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); - if (!kctx->mem_profile_initialized) { + if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { if (!debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx, &kbasep_mem_profile_debugfs_fops)) { err = -EAGAIN; } else { - kctx->mem_profile_initialized = true; + kbase_ctx_flag_set(kctx, + KCTX_MEM_PROFILE_INITIALIZED); } } - if (kctx->mem_profile_initialized) { + if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { kfree(kctx->mem_profile_data); kctx->mem_profile_data = data; kctx->mem_profile_size = size; } dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", - err, kctx->mem_profile_initialized); + err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); mutex_unlock(&kctx->mem_profile_lock); @@ -98,7 +99,7 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) mutex_lock(&kctx->mem_profile_lock); dev_dbg(kctx->kbdev->dev, "initialised: %d", - kctx->mem_profile_initialized); + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); kfree(kctx->mem_profile_data); kctx->mem_profile_data = NULL; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index 48d53723a9b4..5c1b9c3b84c4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -41,6 +41,7 @@ #include #include #include +#include #define KBASE_MMU_PAGE_ENTRIES 512 @@ -139,6 +140,16 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + if (unlikely(faulting_as->protected_mode)) + { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Protected mode fault"); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + + goto fault_done; + } + fault_status = faulting_as->fault_status; switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { @@ -153,7 +164,7 @@ void page_fault_worker(struct work_struct *data) case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Tranlation table bus fault"); + "Translation table bus fault"); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: @@ -219,6 +230,8 @@ void page_fault_worker(struct work_struct *data) region->start_pfn + kbase_reg_current_backed_size(region)); + mutex_lock(&kbdev->mmu_hw_mutex); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); /* [1] in case another page fault occurred while we were @@ -231,6 +244,9 @@ void page_fault_worker(struct work_struct *data) */ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); @@ -249,12 +265,17 @@ void page_fault_worker(struct work_struct *data) kbase_reg_current_backed_size(region); if (0 == new_pages) { + mutex_lock(&kbdev->mmu_hw_mutex); + /* Duplicate of a fault we've already handled, nothing to do */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); /* See comment [1] about UNLOCK usage */ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); @@ -314,7 +335,7 @@ void page_fault_worker(struct work_struct *data) kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -338,7 +359,7 @@ void page_fault_worker(struct work_struct *data) new_pages, op, 1); - mutex_unlock(&faulting_as->transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ /* reenable this in the mask */ @@ -406,14 +427,17 @@ sub_pages: KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); -/* Given PGD PFN for level N, return PGD PFN for level N+1 */ -static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) +/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the + * new table from the pool if needed and possible + */ +static int mmu_get_next_pgd(struct kbase_context *kctx, + phys_addr_t *pgd, u64 vpfn, int level) { u64 *page; phys_addr_t target_pgd; struct page *p; - KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(*pgd); KBASE_DEBUG_ASSERT(NULL != kctx); lockdep_assert_held(&kctx->mmu_lock); @@ -425,11 +449,11 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, vpfn >>= (3 - level) * 9; vpfn &= 0x1FF; - p = pfn_to_page(PFN_DOWN(pgd)); + p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (NULL == page) { dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); - return 0; + return -EINVAL; } target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); @@ -437,9 +461,9 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, if (!target_pgd) { target_pgd = kbase_mmu_alloc_pgd(kctx); if (!target_pgd) { - dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); + dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); kunmap(p); - return 0; + return -ENOMEM; } kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); @@ -449,10 +473,13 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, } kunmap(p); - return target_pgd; + *pgd = target_pgd; + + return 0; } -static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) +static int mmu_get_bottom_pgd(struct kbase_context *kctx, + u64 vpfn, phys_addr_t *out_pgd) { phys_addr_t pgd; int l; @@ -461,15 +488,17 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { - pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); + int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l); /* Handle failure condition */ - if (!pgd) { - dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); - return 0; + if (err) { + dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); + return err; } } - return pgd; + *out_pgd = pgd; + + return 0; } static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) @@ -608,8 +637,19 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - pgd = mmu_get_bottom_pgd(kctx, vpfn); - if (!pgd) { + do { + err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu_lock); + err = kbase_mem_pool_grow(&kctx->mem_pool, + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu_lock); + } while (!err); + if (err) { dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially @@ -618,7 +658,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -EINVAL; goto fail_unlock; } @@ -710,8 +749,19 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - pgd = mmu_get_bottom_pgd(kctx, vpfn); - if (!pgd) { + do { + err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu_lock); + err = kbase_mem_pool_grow(&kctx->mem_pool, + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu_lock); + } while (!err); + if (err) { dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially @@ -720,7 +770,6 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -EINVAL; goto fail_unlock; } @@ -869,8 +918,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, u32 op; /* AS transaction begin */ - mutex_lock(&kbdev->as[ - kctx->as_nr].transaction_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); if (sync) op = AS_COMMAND_FLUSH_MEM; @@ -893,8 +941,7 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ - mutex_unlock(&kbdev->as[ - kctx->as_nr].transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ #ifndef CONFIG_MALI_NO_MALI @@ -921,13 +968,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, void kbase_mmu_update(struct kbase_context *kctx) { - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * - * as_nr won't change because the caller has the runpool_irq lock */ + * as_nr won't change because the caller has the hwaccess_lock */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); kctx->kbdev->mmu_mode->update(kctx); } @@ -935,8 +982,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_update); void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) { - lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); kbdev->mmu_mode->disable_as(kbdev, as_nr); } @@ -946,11 +993,10 @@ void kbase_mmu_disable(struct kbase_context *kctx) /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * - * as_nr won't change because the caller has the runpool_irq lock */ + * as_nr won't change because the caller has the hwaccess_lock */ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); /* * The address space is being disabled, drain all knowledge of it out @@ -1008,8 +1054,8 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) if (count > nr) count = nr; - pgd = mmu_get_bottom_pgd(kctx, vpfn); - if (!pgd) { + err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + if (err) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); err = -EINVAL; goto fail_unlock; @@ -1092,10 +1138,20 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph if (count > nr) count = nr; - pgd = mmu_get_bottom_pgd(kctx, vpfn); - if (!pgd) { + do { + err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu_lock); + err = kbase_mem_pool_grow(&kctx->mem_pool, + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu_lock); + } while (!err); + if (err) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); - err = -EINVAL; goto fail_unlock; } @@ -1316,13 +1372,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) return NULL; } - mutex_lock(&kctx->mmu_lock); - size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); kaddr = vmalloc_user(size_left); + mutex_lock(&kctx->mmu_lock); + if (kaddr) { u64 end_marker = 0xFFULL; char *buffer; @@ -1407,6 +1463,18 @@ void bus_fault_worker(struct work_struct *data) return; } + if (unlikely(faulting_as->protected_mode)) + { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure"); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbasep_js_runpool_release_ctx(kbdev, kctx); + atomic_dec(&kbdev->faults_pending); + return; + + } + #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. @@ -1423,15 +1491,14 @@ void bus_fault_worker(struct work_struct *data) /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ /* AS transaction begin */ - mutex_lock(&kbdev->as[as_no].transaction_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); /* Set the MMU into unmapped mode */ - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, - flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->as[as_no].transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, @@ -1666,7 +1733,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, dev_err(kbdev->dev, "Unhandled Page fault in AS%d at VA 0x%016llX\n" "Reason: %s\n" - "raw fault status 0x%X\n" + "raw fault status: 0x%X\n" "decoded fault status: %s\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" @@ -1695,15 +1762,15 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, /* Stop the kctx from submitting more jobs and cause it to be scheduled * out/rescheduled - this will occur on releasing the context's refcount */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbasep_js_clear_submit_allowed(js_devdata, kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this * context can appear in the job slots from this point on */ kbase_backend_jm_kill_jobs_from_kctx(kctx); /* AS transaction begin */ - mutex_lock(&as->transaction_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); #if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. @@ -1715,11 +1782,11 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&as->transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ /* Clear down the fault */ kbase_mmu_hw_clear_fault(kbdev, as, kctx, @@ -1756,20 +1823,20 @@ void kbasep_as_do_poke(struct work_struct *work) kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); /* AS transaction begin */ - mutex_lock(&as->transaction_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); /* Force a uTLB invalidate */ kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, AS_COMMAND_UNLOCK, 0); - mutex_unlock(&as->transaction_mutex); + mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (as->poke_refcount && !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { /* Only queue up the timer if we need it, and we're not trying to kill it */ hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); } - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) @@ -1793,7 +1860,7 @@ enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) * This must only be called on a context that's scheduled in, and an atom * that's running on the GPU. * - * The caller must hold kbasep_js_device_data::runpool_irq::lock + * The caller must hold hwaccess_lock * * This can be called safely from atomic context */ @@ -1805,7 +1872,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_ KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(katom); KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (katom->poking) return; @@ -1813,7 +1880,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_ katom->poking = 1; /* It's safe to work on the as/as_nr without an explicit reference, - * because the caller holds the runpool_irq lock, and the atom itself + * because the caller holds the hwaccess_lock, and the atom itself * was also running and had already taken a reference */ as = &kbdev->as[kctx->as_nr]; @@ -1850,18 +1917,18 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase as = &kbdev->as[kctx->as_nr]; - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); KBASE_DEBUG_ASSERT(as->poke_refcount > 0); KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); if (--(as->poke_refcount) == 0) { as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); hrtimer_cancel(&as->poke_timer); flush_workqueue(as->poke_wq); - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Re-check whether it's still needed */ if (as->poke_refcount) { @@ -1886,7 +1953,7 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase * the context until new atoms are run */ } } - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); katom->poking = 0; } @@ -1895,7 +1962,7 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (!kctx) { dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c new file mode 100644 index 000000000000..c970650069cd --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c @@ -0,0 +1,130 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase.h" + +#include "mali_kbase_regs_history_debugfs.h" + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +#include + + +static int regs_history_size_get(void *data, u64 *val) +{ + struct kbase_io_history *const h = data; + + *val = h->size; + + return 0; +} + +static int regs_history_size_set(void *data, u64 val) +{ + struct kbase_io_history *const h = data; + + return kbase_io_history_resize(h, (u16)val); +} + + +DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, + regs_history_size_get, + regs_history_size_set, + "%llu\n"); + + +/** + * regs_history_show - show callback for the register access history file. + * + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to dump all recent accesses to the GPU registers. + * + * @return 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int regs_history_show(struct seq_file *sfile, void *data) +{ + struct kbase_io_history *const h = sfile->private; + u16 i; + size_t iters; + unsigned long flags; + + if (!h->enabled) { + seq_puts(sfile, "The register access history is disabled\n"); + goto out; + } + + spin_lock_irqsave(&h->lock, flags); + + iters = (h->size > h->count) ? h->count : h->size; + seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, + (void *)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); + +out: + return 0; +} + + +/** + * regs_history_open - open operation for regs_history debugfs file + * + * @in: &struct inode pointer + * @file: &struct file pointer + * + * @return file descriptor + */ +static int regs_history_open(struct inode *in, struct file *file) +{ + return single_open(file, ®s_history_show, in->i_private); +} + + +static const struct file_operations regs_history_fops = { + .open = ®s_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history.enabled); + debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history, ®s_history_size_fops); + debugfs_create_file("regs_history", S_IRUGO, + kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); +} + + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h new file mode 100644 index 000000000000..f10837002330 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Header file for register access history support via debugfs + * + * This interface is made available via /sys/kernel/debug/mali#/regs_history*. + * + * Usage: + * - regs_history_enabled: whether recording of register accesses is enabled. + * Write 'y' to enable, 'n' to disable. + * - regs_history_size: size of the register history buffer, must be > 0 + * - regs_history: return the information about last accesses to the registers. + */ + +#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H +#define _KBASE_REGS_HISTORY_DEBUGFS_H + +struct kbase_device; + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/** + * kbasep_regs_history_debugfs_init - add debugfs entries for register history + * + * @kbdev: Pointer to kbase_device containing the register history + */ +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + +#else /* CONFIG_DEBUG_FS */ + +#define kbasep_regs_history_debugfs_init CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index d3a3dbfa5241..84aa3316e435 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -1098,7 +1098,6 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) bool kbase_replay_process(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - struct kbase_jd_context *jctx = &kctx->jctx; struct kbase_device *kbdev = kctx->kbdev; /* Don't replay this atom if these issues are not present in the @@ -1122,7 +1121,7 @@ bool kbase_replay_process(struct kbase_jd_atom *katom) return false; } - if (jctx->sched_info.ctx.is_dying) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); if (katom->retry_count) diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 07b862546f80..88b91a2efba2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -639,8 +639,7 @@ struct kbase_debug_copy_buffer { struct page **pages; int nr_pages; size_t offset; - /*To find memory region*/ - u64 gpu_addr; + struct kbase_mem_phy_alloc *gpu_alloc; struct page **extres_pages; int nr_extres_pages; @@ -677,10 +676,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) kbase_gpu_vm_lock(katom->kctx); for (i = 0; i < nr; i++) { int p; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, buffers[i].gpu_addr); + struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; if (!buffers[i].pages) break; @@ -691,8 +687,8 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) put_page(pg); } kfree(buffers[i].pages); - if (reg && reg->gpu_alloc) { - switch (reg->gpu_alloc->type) { + if (gpu_alloc) { + switch (gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { free_user_buffer(&buffers[i]); @@ -702,7 +698,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) /* Nothing to be done. */ break; } - kbase_mem_phy_alloc_put(reg->gpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); } } kbase_gpu_vm_unlock(katom->kctx); @@ -790,20 +786,20 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) goto out_cleanup; } - buffers[i].gpu_addr = user_extres.ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; kbase_gpu_vm_lock(katom->kctx); reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, buffers[i].gpu_addr); + katom->kctx, user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - if (NULL == reg || NULL == reg->cpu_alloc || + if (NULL == reg || NULL == reg->gpu_alloc || (reg->flags & KBASE_REG_FREE)) { ret = -EINVAL; goto out_unlock; } - kbase_mem_phy_alloc_get(reg->gpu_alloc); + buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); buffers[i].nr_extres_pages = reg->nr_pages; + if (reg->nr_pages*PAGE_SIZE != buffers[i].size) dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); @@ -909,25 +905,22 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, { unsigned int i; unsigned int target_page_nr = 0; - struct kbase_va_region *reg; struct page **pages = buf_data->pages; u64 offset = buf_data->offset; size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; size_t to_copy = min(extres_size, buf_data->size); + struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; int ret = 0; KBASE_DEBUG_ASSERT(pages != NULL); kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, buf_data->gpu_addr); - - if (!reg) { + if (!gpu_alloc) { ret = -EINVAL; goto out_unlock; } - switch (reg->gpu_alloc->type) { + switch (gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { for (i = 0; i < buf_data->nr_extres_pages; i++) { @@ -950,14 +943,14 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, break; #ifdef CONFIG_DMA_SHARED_BUFFER case KBASE_MEM_TYPE_IMPORTED_UMM: { - struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; KBASE_DEBUG_ASSERT(dma_buf != NULL); KBASE_DEBUG_ASSERT(dma_buf->size == buf_data->nr_extres_pages * PAGE_SIZE); ret = dma_buf_begin_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) 0, buf_data->nr_extres_pages*PAGE_SIZE, #endif DMA_FROM_DEVICE); @@ -980,7 +973,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, break; } dma_buf_end_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) 0, buf_data->nr_extres_pages*PAGE_SIZE, #endif DMA_FROM_DEVICE); @@ -1329,9 +1322,11 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) break; } case BASE_JD_REQ_SOFT_JIT_ALLOC: + return -EINVAL; /* Temporarily disabled */ kbase_jit_allocate_process(katom); break; case BASE_JD_REQ_SOFT_JIT_FREE: + return -EINVAL; /* Temporarily disabled */ kbase_jit_free_process(katom); break; case BASE_JD_REQ_SOFT_EXT_RES_MAP: diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index ad88b7b69018..4c1535fc0139 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -142,7 +142,13 @@ enum tl_msg_id_obj { KBASE_TL_NDEP_ATOM_ATOM, KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, + KBASE_TL_ATTRIB_ATOM_PRIORITY, + KBASE_TL_ATTRIB_ATOM_STATE, + KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, KBASE_TL_ATTRIB_AS_CONFIG, + KBASE_TL_EVENT_LPU_SOFTSTOP, + KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, + KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, /* Job dump specific events. */ KBASE_JD_GPU_SOFT_RESET @@ -151,11 +157,9 @@ enum tl_msg_id_obj { /* Message ids of trace events that are recorded in the auxiliary stream. */ enum tl_msg_id_aux { KBASE_AUX_PM_STATE, - KBASE_AUX_ISSUE_JOB_SOFTSTOP, - KBASE_AUX_JOB_SOFTSTOP, - KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, - KBASE_AUX_PAGESALLOC + KBASE_AUX_PAGESALLOC, + KBASE_AUX_DEVFREQ_TARGET }; /*****************************************************************************/ @@ -429,6 +433,27 @@ static const struct tp_desc tp_desc_obj[] = { "@pLLI", "atom,descriptor,affinity,config" }, + { + KBASE_TL_ATTRIB_ATOM_PRIORITY, + __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY), + "atom priority", + "@pI", + "atom,prio" + }, + { + KBASE_TL_ATTRIB_ATOM_STATE, + __stringify(KBASE_TL_ATTRIB_ATOM_STATE), + "atom state", + "@pI", + "atom,state" + }, + { + KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE, + __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE), + "atom caused priority change", + "@p", + "atom" + }, { KBASE_TL_ATTRIB_AS_CONFIG, __stringify(KBASE_TL_ATTRIB_AS_CONFIG), @@ -436,6 +461,27 @@ static const struct tp_desc tp_desc_obj[] = { "@pLLL", "address_space,transtab,memattr,transcfg" }, + { + KBASE_TL_EVENT_LPU_SOFTSTOP, + __stringify(KBASE_TL_EVENT_LPU_SOFTSTOP), + "softstop event on given lpu", + "@p", + "lpu" + }, + { + KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, + __stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX), + "atom softstopped", + "@p", + "atom" + }, + { + KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + __stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE), + "atom softstop issued", + "@p", + "atom" + }, { KBASE_JD_GPU_SOFT_RESET, __stringify(KBASE_JD_GPU_SOFT_RESET), @@ -454,27 +500,6 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, - { - KBASE_AUX_ISSUE_JOB_SOFTSTOP, - __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), - "Issuing job soft stop", - "@p", - "atom" - }, - { - KBASE_AUX_JOB_SOFTSTOP, - __stringify(KBASE_AUX_JOB_SOFTSTOP), - "Job soft stop", - "@I", - "tag_id" - }, - { - KBASE_AUX_JOB_SOFTSTOP_EX, - __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), - "Job soft stop, more details", - "@pI", - "atom,job_type" - }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), @@ -488,6 +513,13 @@ static const struct tp_desc tp_desc_aux[] = { "Total alloc pages change", "@IL", "ctx_nr,page_cnt" + }, + { + KBASE_AUX_DEVFREQ_TARGET, + __stringify(KBASE_AUX_DEVFREQ_TARGET), + "New device frequency target", + "@L", + "target_freq" } }; @@ -1085,9 +1117,10 @@ static ssize_t kbasep_tlstream_read( ssize_t copy_len = 0; KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(buffer); KBASE_DEBUG_ASSERT(f_pos); - CSTD_UNUSED(filp); + + if (!buffer) + return -EINVAL; if ((0 > *f_pos) || (PACKET_SIZE > size)) return -EINVAL; @@ -1315,9 +1348,11 @@ void kbase_tlstream_term(void) } } -int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags) { - if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { + u32 tlstream_enabled = TLSTREAM_ENABLED | flags; + + if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { int rcode; *fd = anon_inode_getfd( @@ -1581,8 +1616,8 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr); + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + + sizeof(nr); unsigned long flags; char *buffer; size_t pos = 0; @@ -2011,6 +2046,79 @@ void __kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } +void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &prio, sizeof(prio)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &state, sizeof(state)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { @@ -2042,11 +2150,11 @@ void __kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) { - const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; + const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); + sizeof(msg_id) + sizeof(u64) + sizeof(lpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -2059,141 +2167,108 @@ void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); + buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -/*****************************************************************************/ - -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) { - const u32 msg_id = KBASE_AUX_PM_STATE; + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + - sizeof(state); + sizeof(msg_id) + sizeof(u64) + sizeof(atom); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, + TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); pos = kbasep_tlstream_write_bytes( - buffer, pos, &core_type, sizeof(core_type)); - pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); + buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_aux_issue_job_softstop(void *katom) +void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) { - const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(katom); + sizeof(msg_id) + sizeof(u64) + sizeof(atom); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); + TL_STREAM_TYPE_OBJ, + msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_aux_job_softstop(u32 js_id) +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { - const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; + const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(js_id); + sizeof(msg_id) + sizeof(u64) + sizeof(gpu); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, + TL_STREAM_TYPE_OBJ, msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); KBASE_DEBUG_ASSERT(msg_size == pos); - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -/** - * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point - * @katom: the atom that has been soft-stopped - * @job_type: the job type - */ -static void __kbase_tlstream_aux_job_softstop_ex_record( - void *katom, u32 job_type) +/*****************************************************************************/ + +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { - const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; + const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); + sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + + sizeof(state); unsigned long flags; char *buffer; size_t pos = 0; buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); + TL_STREAM_TYPE_AUX, + msg_size, &flags); KBASE_DEBUG_ASSERT(buffer); pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); pos = kbasep_tlstream_write_bytes( - buffer, pos, &job_type, sizeof(job_type)); + buffer, pos, &core_type, sizeof(core_type)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - u64 jd = katom->jc; - - while (jd != 0) { - struct job_descriptor_header *job; - struct kbase_vmap_struct map; - - job = kbase_vmap(kctx, jd, sizeof(*job), &map); - if (!job) { - dev_err(kctx->kbdev->dev, - "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", - jd, (void *)katom); - break; - } - if (job->exception_status != BASE_JD_EVENT_STOPPED) { - kbase_vunmap(kctx, &map); - break; - } - - __kbase_tlstream_aux_job_softstop_ex_record( - katom, job->job_type); - - jd = job->job_descriptor_size ? - job->next_job._64 : job->next_job._32; - kbase_vunmap(kctx, &map); - } -} - void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; @@ -2243,3 +2318,25 @@ void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } +void __kbase_tlstream_aux_devfreq_target(u64 target_freq) +{ + const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(target_freq); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &target_freq, sizeof(target_freq)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 22a0d96f9a72..e29be71c142f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -37,8 +37,9 @@ void kbase_tlstream_term(void); /** * kbase_tlstream_acquire - acquire timeline stream file descriptor - * @kctx: kernel common context - * @fd: timeline stream file descriptor + * @kctx: kernel common context + * @fd: timeline stream file descriptor + * @flags: timeline stream flags * * This descriptor is meant to be used by userspace timeline to gain access to * kernel timeline stream. This stream is later broadcasted by user space to the @@ -50,7 +51,7 @@ void kbase_tlstream_term(void); * Return: zero on success (this does not necessarily mean that stream * descriptor could be returned), negative number on error */ -int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd); +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags); /** * kbase_tlstream_flush_streams - flush timeline streams. @@ -102,6 +103,11 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ +#define TL_ATOM_STATE_IDLE 0 +#define TL_ATOM_STATE_READY 1 +#define TL_ATOM_STATE_DONE 2 +#define TL_ATOM_STATE_POSTED 3 + void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); @@ -128,25 +134,38 @@ void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config); +void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); +void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); +void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom); void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg); +void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); +void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); +void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); -void __kbase_tlstream_aux_issue_job_softstop(void *katom); -void __kbase_tlstream_aux_job_softstop(u32 js_id); -void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); +void __kbase_tlstream_aux_devfreq_target(u64 target_freq); + +#define TLSTREAM_ENABLED (1 << 31) extern atomic_t kbase_tlstream_enabled; #define __TRACE_IF_ENABLED(trace_name, ...) \ do { \ int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled) \ + if (enabled & TLSTREAM_ENABLED) \ __kbase_tlstream_##trace_name(__VA_ARGS__); \ } while (0) +#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ + do { \ + int enabled = atomic_read(&kbase_tlstream_enabled); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_##trace_name(__VA_ARGS__); \ + } while (0) + /*****************************************************************************/ /** @@ -429,6 +448,35 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) +/** + * kbase_tlstream_tl_attrib_atom_priority - atom priority + * @atom: name of the atom object + * @prio: atom priority + * + * Function emits a timeline message containing atom priority. + */ +#define kbase_tlstream_tl_attrib_atom_priority(atom, prio) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) + +/** + * kbase_tlstream_tl_attrib_atom_state - atom state + * @atom: name of the atom object + * @state: atom state + * + * Function emits a timeline message containing atom state. + */ +#define kbase_tlstream_tl_attrib_atom_state(atom, state) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) + +/** + * kbase_tlstream_tl_attrib_atom_priority_change - atom caused priority change + * @atom: name of the atom object + * + * Function emits a timeline message signalling priority change + */ +#define kbase_tlstream_tl_attrib_atom_priority_change(atom) \ + __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom) + /** * kbase_tlstream_tl_attrib_as_config - address space attributes * @as: assigned address space @@ -441,6 +489,27 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) +/** + * kbase_tlstream_tl_event_atom_softstop_ex + * @atom: atom identifier + */ +#define kbase_tlstream_tl_event_atom_softstop_ex(atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) + +/** + * kbase_tlstream_tl_event_lpu_softstop + * @lpu: name of the LPU object + */ +#define kbase_tlstream_tl_event_lpu_softstop(lpu) \ + __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) + +/** + * kbase_tlstream_tl_event_atom_softstop_issue + * @atom: atom identifier + */ +#define kbase_tlstream_tl_event_atom_softstop_issue(atom) \ + __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) + /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset * @gpu: name of the GPU object @@ -459,34 +528,6 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_aux_pm_state(core_type, state) \ __TRACE_IF_ENABLED(aux_pm_state, core_type, state) -/** - * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued - * @katom: the atom that is being soft-stopped - */ -#define kbase_tlstream_aux_issue_job_softstop(katom) \ - __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) - -/** - * kbase_tlstream_aux_job_softstop - soft job stop occurred - * @js_id: job slot id - */ -#define kbase_tlstream_aux_job_softstop(js_id) \ - __TRACE_IF_ENABLED(aux_job_softstop, js_id) - -/** - * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom - * @katom: the atom that has been soft-stopped - * - * This trace point adds more details about the soft-stopped atom. These details - * can't be safety collected inside the interrupt handler so we're doing it - * inside a worker. - * - * Note: this is not the same information that is recorded in the trace point, - * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. - */ -#define kbase_tlstream_aux_job_softstop_ex(katom) \ - __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) - /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event * resulting in new pages being mapped @@ -505,5 +546,13 @@ extern atomic_t kbase_tlstream_enabled; #define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) +/** + * kbase_tlstream_aux_devfreq_target - timeline message: new target DVFS + * frequency + * @target_freq: new target frequency + */ +#define kbase_tlstream_aux_devfreq_target(target_freq) \ + __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) + #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c index a606ae810656..5830e87f0818 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -105,6 +105,8 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .release = seq_release, }; +#ifdef CONFIG_DEBUG_FS + void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) { debugfs_create_file("mali_timeline_defs", @@ -112,10 +114,12 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) &kbasep_trace_timeline_debugfs_fops); } +#endif /* CONFIG_DEBUG_FS */ + void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (kbdev->timeline.slot_atoms_submitted[js] > 0) { KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); @@ -134,7 +138,7 @@ void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_conte struct kbase_jd_atom *katom, int js, kbasep_js_atom_done_code done_code) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); @@ -213,7 +217,7 @@ void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timel void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* Simply log the start of the transition */ kbdev->timeline.l2_transitioning = true; KBASE_TIMELINE_POWERING_L2(kbdev); @@ -221,7 +225,7 @@ void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->pm.power_change_lock); + lockdep_assert_held(&kbdev->hwaccess_lock); /* Simply log the end of the transition */ if (kbdev->timeline.l2_transitioning) { kbdev->timeline.l2_transitioning = false; diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h index 22a36494e72e..619072f3215c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -28,9 +28,17 @@ enum kbase_trace_timeline_code { #undef KBASE_TIMELINE_TRACE_CODE }; +#ifdef CONFIG_DEBUG_FS + /** Initialize Timeline DebugFS entries */ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); +#else /* CONFIG_DEBUG_FS */ + +#define kbasep_trace_timeline_debugfs_init CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + /* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE * functions. * Output is timestamped by either sched_clock() (default), local_clock(), or @@ -232,13 +240,12 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); count); \ } while (0) - /* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ /** * Trace that an atom is starting on a job slot * - * The caller must be holding kbasep_js_device_data::runpool_irq::lock + * The caller must be holding hwaccess_lock */ void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js); @@ -257,7 +264,7 @@ void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_con * - kbasep_jm_dequeue_submit_slot() * - kbasep_jm_dequeue_tail_submit_slot() * - * The caller must be holding kbasep_js_device_data::runpool_irq::lock + * The caller must be holding hwaccess_lock */ void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js, @@ -321,14 +328,14 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); } static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js, kbasep_js_atom_done_code done_code) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->hwaccess_lock); } static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index e880d9663d0e..711b09138db3 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -56,9 +56,15 @@ * * 10.4: * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests + * + * 10.5: + * - Reverted to performing mmap in user space so that tools like valgrind work. + * + * 10.6: + * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 4 +#define BASE_UK_VERSION_MINOR 6 struct kbase_uk_mem_alloc { union uk_header header; @@ -319,6 +325,7 @@ struct kbase_uk_profiling_controls { struct kbase_uk_debugfs_mem_profile_add { union uk_header header; u32 len; + u32 padding; union kbase_pointer buf; }; @@ -333,12 +340,30 @@ struct kbase_uk_context_id { /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header + * @flags: timeline stream flags * @fd: timeline stream file descriptor * - * This structure is used used when performing a call to acquire kernel side - * timeline stream file descriptor. + * This structure is used when performing a call to acquire kernel side timeline + * stream file descriptor. */ struct kbase_uk_tlstream_acquire { + union uk_header header; + /* IN */ + u32 flags; + /* OUT */ + s32 fd; +}; + +/** + * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange + * structure + * @header: UK structure header + * @fd: timeline stream file descriptor + * + * This structure is used when performing a call to acquire kernel side timeline + * stream file descriptor. + */ +struct kbase_uk_tlstream_acquire_v10_4 { union uk_header header; /* IN */ /* OUT */ @@ -497,7 +522,7 @@ enum kbase_uk_function_id { #if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ !defined(MALI_MIPE_ENABLED) - KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), + KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), @@ -515,6 +540,11 @@ enum kbase_uk_function_id { KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) + KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40), +#endif /* MALI_MIPE_ENABLED */ + KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index bd6095f77480..3adb06ddb266 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -1636,11 +1637,9 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; - if (vm_size > size) - return -EINVAL; offset = vma->vm_pgoff << PAGE_SHIFT; - if ((vm_size + offset) > size) + if (vm_size > size - offset) return -EINVAL; addr = __pa((unsigned long)cli->dump_buffers + offset); @@ -1687,9 +1686,9 @@ static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) unsigned long flags; down(&js_devdata->schedule_sem); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbase_jm_kick_all(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_backend_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); up(&js_devdata->schedule_sem); } diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index de6c206f3c71..de3053b88da8 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -61,6 +61,7 @@ #define LATEST_FLUSH 0x038 /* (RO) */ #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ #define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ #define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript index c36e92dd0d7a..7b7ec772ecf3 100644 --- a/drivers/gpu/arm/midgard/sconscript +++ b/drivers/gpu/arm/midgard/sconscript @@ -13,13 +13,9 @@ # - -import os -import re import sys Import('env') - if Glob('tests/sconscript'): SConscript( 'tests/sconscript' ) @@ -33,18 +29,12 @@ else: fake_platform_device = 1 # Source files required for kbase. -kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'), - Glob('#kernel/drivers/gpu/arm/midgard/*.c'), - Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'), - Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'), - Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])), - Glob('#kernel/drivers/gpu/arm/midgard/*.h'), - Glob('#kernel/drivers/gpu/arm/midgard/*.h'), - Glob('#kernel/drivers/gpu/arm/midgard/Makefile', - Glob('#kernel/drivers/gpu/arm/midgard/K*')) - ] - -kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')] +kbase_src = [ + Glob('*.c'), + Glob('backend/*/*.c'), + Glob('internal/*/*.c'), + Glob('platform/%s/*.c' % env['platform_config']), +] if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] @@ -53,75 +43,43 @@ if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit # we need platform config for GPL version using fake platform if fake_platform_device==1: # Check if we are compiling for PBX - linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' - search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y' - REALVIEW_PBX = 0 - for line in open(linux_config_file, 'r'): - if re.search(search_term, line): - REALVIEW_PBX = 1 - break - if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'): + if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \ + env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}: sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n") # if the file platform config file is in the tpip directory then use that, otherwise use the default config directory if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])): kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])) else: kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config'])) - -# Note: cleaning via the Linux kernel build system does not yet work -if env.GetOption('clean') : - env.Execute(Action("make clean", '[clean] kbase')) - cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, []) -else: - if env['os'] == 'android': - env['android'] = 1 - else: - env['android'] = 0 - - if env['unit'] == '1': - env['kernel_test'] = 1 - else: - env['kernel_test'] = 0 - - #Extract environment options, note the trailing spaces are important - env_options = \ - "PLATFORM=${platform} " +\ - "MALI_ERROR_INJECT_ON=${error_inject} " +\ - "MALI_ANDROID=${android} " +\ - "MALI_KERNEL_TEST_API=${kernel_test} " +\ - "MALI_UNIT_TEST=${unit} " +\ - "MALI_RELEASE_NAME=\"${mali_release_name}\" "+\ - "MALI_MOCK_TEST=%s " % mock_test +\ - "MALI_CUSTOMER_RELEASE=${release} " +\ - "MALI_INSTRUMENTATION_LEVEL=${instr} " +\ - "MALI_COVERAGE=${coverage} " +\ - "MALI_BUS_LOG=${buslog} " - - make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs') - make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device) - make_action = make_action_start + env_options + make_action_end - makeAction=Action(make_action, '$MAKECOMSTR') - cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction]) +make_args = env.kernel_get_config_defines(ret_list = True, + fake = fake_platform_device) + [ + 'PLATFORM=%s' % env['platform'], + 'MALI_ERROR_INJECT_ON=%s' % env['error_inject'], + 'MALI_KERNEL_TEST_API=%s' % env['unit'], + 'MALI_UNIT_TEST=%s' % env['unit'], + 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], + 'MALI_MOCK_TEST=%s' % mock_test, + 'MALI_CUSTOMER_RELEASE=%s' % env['release'], + 'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'], + 'MALI_COVERAGE=%s' % env['coverage'], + 'MALI_BUS_LOG=%s' % env['buslog'], +] + +kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, + make_args = make_args) # Add a dependency on kds.ko. # Only necessary when KDS is not built into the kernel. # if env['os'] != 'android': - linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config' - search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y' - kds_in_kernel = 0 - for line in open(linux_config_file, 'r'): - if re.search(search_term, line): - # KDS in kernel. - kds_in_kernel = 1 - if not kds_in_kernel: - env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko') + if not env.KernelConfigEnabled("CONFIG_KDS"): + env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko') # need Module.symvers from ump.ko build if int(env['ump']) == 1: - env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko') + env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko') -env.KernelObjTarget('kbase', cmd) +env.KernelObjTarget('kbase', kbase) env.AppendUnique(BASE=['cutils_linked_list'])