From: chenzhen <chenzhen@rock-chips.com>
Date: Thu, 12 Jan 2017 09:18:47 +0000 (+0800)
Subject: Revert "MALI: rockchip: upgrade midgard DDK to r14p0-01rel0"
X-Git-Tag: firefly_0821_release~753
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7f166ae12d17f9dd42874b5182f3ad0cf1e95ff2;p=firefly-linux-kernel-4.4.55.git

Revert "MALI: rockchip: upgrade midgard DDK to r14p0-01rel0"

This reverts commit d1637ff80953fd46692f923f3ee7b656fb917081.

Change-Id: Ib99bae99fe7246142bfa7369b8e79ebbfae1e736
Signed-off-by: chenzhen <chenzhen@rock-chips.com>
---

diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
index 4348704e7640..8a47cddf7936 100644
--- a/drivers/gpu/arm/midgard/Kbuild
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -15,7 +15,7 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r14p0-01rel0"
+MALI_RELEASE_NAME ?= "r13p0-00rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
@@ -113,8 +113,7 @@ SRC := \
 	mali_kbase_mem_pool_debugfs.c \
 	mali_kbase_tlstream.c \
 	mali_kbase_strings.c \
-	mali_kbase_as_fault_debugfs.c \
-	mali_kbase_regs_history_debugfs.c
+	mali_kbase_as_fault_debugfs.c
 
 ifeq ($(MALI_UNIT_TEST),1)
 	SRC += mali_kbase_tlstream_test.c
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
index fef9a2cb743e..c6862539c8dd 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -21,8 +21,6 @@
 void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
 		u32 mode)
 {
-	kbdev->current_gpu_coherency_mode = mode;
-
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
 		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
index 805dbd658906..083810f853ae 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -19,7 +19,6 @@
 
 
 #include <mali_kbase.h>
-#include <mali_kbase_tlstream.h>
 #include <mali_kbase_config_defaults.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #ifdef CONFIG_DEVFREQ_THERMAL
@@ -124,8 +123,6 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 
 	kbdev->current_voltage = voltage;
 
-	kbase_tlstream_aux_devfreq_target((u64)freq);
-
 	kbase_pm_reset_dvfs_utilisation(kbdev);
 
 	return err;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
index dcdf15cdc3e8..b9238a305177 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -26,145 +26,16 @@
 #include <backend/gpu/mali_kbase_device_internal.h>
 
 #if !defined(CONFIG_MALI_NO_MALI)
-
-
-#ifdef CONFIG_DEBUG_FS
-
-
-int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
-{
-	struct kbase_io_access *old_buf;
-	struct kbase_io_access *new_buf;
-	unsigned long flags;
-
-	if (!new_size)
-		goto out_err; /* The new size must not be 0 */
-
-	new_buf = vmalloc(new_size * sizeof(*h->buf));
-	if (!new_buf)
-		goto out_err;
-
-	spin_lock_irqsave(&h->lock, flags);
-
-	old_buf = h->buf;
-
-	/* Note: we won't bother with copying the old data over. The dumping
-	 * logic wouldn't work properly as it relies on 'count' both as a
-	 * counter and as an index to the buffer which would have changed with
-	 * the new array. This is a corner case that we don't need to support.
-	 */
-	h->count = 0;
-	h->size = new_size;
-	h->buf = new_buf;
-
-	spin_unlock_irqrestore(&h->lock, flags);
-
-	vfree(old_buf);
-
-	return 0;
-
-out_err:
-	return -1;
-}
-
-
-int kbase_io_history_init(struct kbase_io_history *h, u16 n)
-{
-	h->enabled = false;
-	spin_lock_init(&h->lock);
-	h->count = 0;
-	h->size = 0;
-	h->buf = NULL;
-	if (kbase_io_history_resize(h, n))
-		return -1;
-
-	return 0;
-}
-
-
-void kbase_io_history_term(struct kbase_io_history *h)
-{
-	vfree(h->buf);
-	h->buf = NULL;
-}
-
-
-/* kbase_io_history_add - add new entry to the register access history
- *
- * @h: Pointer to the history data structure
- * @addr: Register address
- * @value: The value that is either read from or written to the register
- * @write: 1 if it's a register write, 0 if it's a read
- */
-static void kbase_io_history_add(struct kbase_io_history *h,
-		void __iomem const *addr, u32 value, u8 write)
-{
-	struct kbase_io_access *io;
-	unsigned long flags;
-
-	spin_lock_irqsave(&h->lock, flags);
-
-	io = &h->buf[h->count % h->size];
-	io->addr = (uintptr_t)addr | write;
-	io->value = value;
-	++h->count;
-	/* If count overflows, move the index by the buffer size so the entire
-	 * buffer will still be dumped later */
-	if (unlikely(!h->count))
-		h->count = h->size;
-
-	spin_unlock_irqrestore(&h->lock, flags);
-}
-
-
-void kbase_io_history_dump(struct kbase_device *kbdev)
-{
-	struct kbase_io_history *const h = &kbdev->io_history;
-	u16 i;
-	size_t iters;
-	unsigned long flags;
-
-	if (!unlikely(h->enabled))
-		return;
-
-	spin_lock_irqsave(&h->lock, flags);
-
-	dev_err(kbdev->dev, "Register IO History:");
-	iters = (h->size > h->count) ? h->count : h->size;
-	dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters,
-			h->count);
-	for (i = 0; i < iters; ++i) {
-		struct kbase_io_access *io =
-			&h->buf[(h->count - iters + i) % h->size];
-		char const access = (io->addr & 1) ? 'w' : 'r';
-
-		dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access,
-				(void *)(io->addr & ~0x1), io->value);
-	}
-
-	spin_unlock_irqrestore(&h->lock, flags);
-}
-
-
-#endif /* CONFIG_DEBUG_FS */
-
-
 void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
 						struct kbase_context *kctx)
 {
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
 	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
 
 	writel(value, kbdev->reg + offset);
 
-#ifdef CONFIG_DEBUG_FS
-	if (unlikely(kbdev->io_history.enabled))
-		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
-				value, 1);
-#endif /* CONFIG_DEBUG_FS */
-	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
-
 	if (kctx && kctx->jctx.tb)
 		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
 									value);
@@ -182,13 +53,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
 
 	val = readl(kbdev->reg + offset);
 
-#ifdef CONFIG_DEBUG_FS
-	if (unlikely(kbdev->io_history.enabled))
-		kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
-				val, 0);
-#endif /* CONFIG_DEBUG_FS */
 	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
-
 	if (kctx && kctx->jctx.tb)
 		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
 	return val;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
index 7ad309e8d7f4..3f06a10f7fed 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -45,11 +45,11 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
 					KBASE_INSTR_STATE_REQUEST_CLEAN);
 
 	/* Enable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
 
 	/* clean&invalidate the caches so we're sure the mmu tables for the dump
 	 * buffer is valid */
@@ -96,11 +96,11 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	}
 
 	/* Enable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
 						PRFCNT_SAMPLE_COMPLETED, NULL);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
 
 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
@@ -185,9 +185,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
 	return err;
  out_unrequest_cores:
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
  out_err:
 	return err;
 }
@@ -228,10 +226,11 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	kbdev->hwcnt.backend.triggered = 0;
 
 	/* Disable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
 
 	/* Disable the counters */
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
@@ -244,11 +243,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 	kbase_pm_unrequest_cores(kbdev, true,
 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
 
-	kbase_pm_release_l2_caches(kbdev);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
+	kbase_pm_release_l2_caches(kbdev);
+
 	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
 									kctx);
 
@@ -393,12 +391,12 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
 
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		/* Disable interrupt */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
 		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 									NULL);
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
 
 		/* Wakeup... */
 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
index 8416b80e8b77..b891b12a3299 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -148,8 +148,6 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler);
-
 static irq_handler_t kbase_handler_table[] = {
 	[JOB_IRQ_TAG] = kbase_job_irq_handler,
 	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
index 202dcfa384a2..f2167887229b 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,7 +52,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 	lockdep_assert_held(&js_devdata->runpool_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
 
@@ -141,7 +142,8 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
 		return;
 	}
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
 	if (js_per_as_data->as_busy_refcount != 0) {
@@ -217,7 +219,8 @@ static bool check_is_runpool_full(struct kbase_device *kbdev,
 	is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
 						kbdev->nr_hw_address_spaces);
 
-	if (kctx && !kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
 		lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 		/* Contexts that submit might use less of the address spaces
 		 * available, due to HW workarounds.  In which case, the runpool
@@ -264,7 +267,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 		return i;
 	}
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	/* No address space currently free, see if we can release one */
 	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
@@ -278,14 +281,16 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
 		/* Don't release privileged or active contexts, or contexts with
 		 * jobs running */
-		if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) &&
+		if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED) &&
 			js_per_as_data->as_busy_refcount == 0) {
 			if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
 								as_kctx)) {
 				WARN(1, "Failed to retain active context\n");
 
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
+				spin_unlock_irqrestore(
+						&js_devdata->runpool_irq.lock,
+									flags);
 				mutex_unlock(&js_devdata->runpool_mutex);
 				mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -298,7 +303,8 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 			 * context we're about to release without violating lock
 			 * ordering
 			 */
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
 			mutex_unlock(&js_devdata->runpool_mutex);
 			mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -309,7 +315,7 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
 			kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
 
-			if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) {
+			if (!as_js_kctx_info->ctx.is_scheduled) {
 				kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
 								as_kctx,
 								true);
@@ -330,11 +336,11 @@ int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
 
 			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 			mutex_lock(&js_devdata->runpool_mutex);
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 		}
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	mutex_unlock(&js_devdata->runpool_mutex);
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
@@ -363,11 +369,12 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
 	new_address_space = &kbdev->as[as_nr];
 
 	lockdep_assert_held(&js_devdata->runpool_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&new_address_space->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
 
-	if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
 		/* We need to retain it to keep the corresponding address space
 		 */
 		kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
index 08a7400e66d5..83d477898c5e 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -73,7 +73,8 @@ struct slot_rb {
  * @reset_timer:		Timeout for soft-stops before the reset
  * @timeouts_updated:           Have timeout values just been updated?
  *
- * The hwaccess_lock (a spinlock) must be held when accessing this structure
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
  */
 struct kbase_backend_data {
 	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
index 668258b4f303..00900a99a898 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -221,40 +221,19 @@ static void kbasep_job_slot_update_head_start_timestamp(
 /**
  * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
  * @kbdev: kbase device
- * @js: job slot
+ * @i: job slot
  *
  * Get kbase atom by calling kbase_gpu_inspect for given job slot.
  * Then use obtained katom and name of slot associated with the given
  * job slot number in tracepoint call to the instrumentation module
  * informing that given atom is no longer executed on given lpu (job slot).
  */
-static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int js)
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
 {
-	int i;
-	for (i = 0;
-	     i < kbase_backend_nr_atoms_submitted(kbdev, js);
-	     i++) {
-		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
-
-		kbase_tlstream_tl_nret_atom_lpu(katom,
-			&kbdev->gpu_props.props.raw_props.js_features[js]);
-	}
-}
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
 
-/**
- * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
- * tracepoint
- * @kbdev: kbase device
- * @js: job slot
- *
- * Make a tracepoint call to the instrumentation module informing that
- * softstop happened on given lpu (job slot).
- */
-static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
-					int js)
-{
-	kbase_tlstream_tl_event_lpu_softstop(
-		&kbdev->gpu_props.props.raw_props.js_features[js]);
+	kbase_tlstream_tl_nret_atom_lpu(katom,
+		&kbdev->gpu_props.props.raw_props.js_features[i]);
 }
 
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
@@ -283,7 +262,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 						irq_throttle_cycles, NULL);
 	}
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	while (done) {
 		u32 failed = done >> 16;
@@ -318,8 +297,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 								NULL, 0);
 #endif
 
-					kbasep_trace_tl_event_lpu_softstop(
-						kbdev, i);
+					kbase_tlstream_aux_job_softstop(i);
 
 					kbasep_trace_tl_nret_atom_lpu(
 						kbdev, i);
@@ -478,7 +456,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 								end_timestamp);
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 #if KBASE_GPU_RESET_EN
 	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
 						KBASE_RESET_GPU_COMMITTED) {
@@ -561,7 +539,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
 
 		/* Mark the point where we issue the soft-stop command */
-		kbase_tlstream_tl_event_atom_softstop_issue(target_katom);
+		kbase_tlstream_aux_issue_job_softstop(target_katom);
 
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
 			int i;
@@ -747,7 +725,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
 
 	/* Cancel any remaining running jobs for this kctx  */
 	mutex_lock(&kctx->jctx.lock);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	/* Invalidate all jobs in context, to prevent re-submitting */
 	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
@@ -759,7 +737,7 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
 	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
 		kbase_job_slot_hardstop(kctx, i, NULL);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 	mutex_unlock(&kctx->jctx.lock);
 }
 
@@ -770,13 +748,12 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 	int js = target_katom->slot_nr;
 	int priority = target_katom->sched_priority;
 	int i;
-	bool stop_sent = false;
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	kbdev = kctx->kbdev;
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
 		struct kbase_jd_atom *katom;
@@ -788,14 +765,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 		if (katom->kctx != kctx)
 			continue;
 
-		if (katom->sched_priority > priority) {
-			if (!stop_sent)
-				kbase_tlstream_tl_attrib_atom_priority_change(
-						target_katom);
-
+		if (katom->sched_priority > priority)
 			kbase_job_slot_softstop(kbdev, js, katom);
-			stop_sent = true;
-		}
 	}
 }
 
@@ -864,7 +835,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
 	 * policy queue either */
 	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
 	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
-		   !kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			kctx->jctx.sched_info.ctx.is_scheduled == false);
 
 	spin_lock_irqsave(&reset_data.lock, flags);
 	if (reset_data.stage == 1) {
@@ -974,7 +945,7 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev,
 	bool ret = false;
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	/* When we have an atom the decision can be made straight away. */
 	if (target_katom)
@@ -1063,7 +1034,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 #if KBASE_GPU_RESET_EN
 	/* We make the check for AFBC before evicting/stopping atoms.  Note
 	 * that no other thread can modify the slots whilst we have the
-	 * hwaccess_lock. */
+	 * runpool_irq lock. */
 	int needs_workaround_for_afbc =
 			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
 			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
@@ -1159,8 +1130,6 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 {
 	int i;
 
-	kbase_io_history_dump(kbdev);
-
 	dev_err(kbdev->dev, "Register state:");
 	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
@@ -1193,14 +1162,13 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 
 static void kbasep_reset_timeout_worker(struct work_struct *data)
 {
-	unsigned long flags;
+	unsigned long flags, mmu_flags;
 	struct kbase_device *kbdev;
 	int i;
 	ktime_t end_timestamp = ktime_get();
 	struct kbasep_js_device_data *js_devdata;
 	bool try_schedule = false;
 	bool silent = false;
-	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
 
 	KBASE_DEBUG_ASSERT(data);
 
@@ -1238,20 +1206,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 
 	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
 
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	spin_lock(&kbdev->hwaccess_lock);
-	spin_lock(&kbdev->mmu_mask_change);
+	spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
 	/* We're about to flush out the IRQs and their bottom half's */
 	kbdev->irq_reset_flush = true;
 
 	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
 	 * spinlock; this also clears any outstanding interrupts */
-	kbase_pm_disable_interrupts_nolock(kbdev);
-
-	spin_unlock(&kbdev->mmu_mask_change);
-	spin_unlock(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	kbase_pm_disable_interrupts(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+
 	/* Ensure that any IRQ handlers have finished
 	 * Must be done without any locks IRQ handlers will take */
 	kbase_synchronize_irqs(kbdev);
@@ -1262,16 +1228,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	/* The flush has completed so reset the active indicator */
 	kbdev->irq_reset_flush = false;
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
-		/* Ensure that L2 is not transitioning when we send the reset
-		 * command */
-		while (--max_loops && kbase_pm_get_trans_cores(kbdev,
-				KBASE_PM_CORE_L2))
-			;
-
-		WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
-	}
-
 	mutex_lock(&kbdev->pm.lock);
 	/* We hold the pm lock, so there ought to be a current policy */
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
@@ -1294,19 +1250,21 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	kbase_pm_init_hw(kbdev, 0);
 
 	/* Complete any jobs that were still on the GPU */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	kbase_backend_reset(kbdev, &end_timestamp);
 	kbase_pm_metrics_update(kbdev, NULL);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	mutex_unlock(&kbdev->pm.lock);
 
 	mutex_lock(&js_devdata->runpool_mutex);
 
-	mutex_lock(&kbdev->mmu_hw_mutex);
 	/* Reprogram the GPU's MMU */
 	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 		if (js_devdata->runpool_irq.per_as_data[i].kctx)
 			kbase_mmu_update(
@@ -1314,9 +1272,9 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 		else
 			kbase_mmu_disable_as(kbdev, i);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
 	}
-	mutex_unlock(&kbdev->mmu_hw_mutex);
 
 	kbase_pm_enable_interrupts(kbdev);
 
@@ -1424,9 +1382,9 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
 	struct kbasep_js_device_data *js_devdata;
 
 	js_devdata = &kbdev->js_data;
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	kbasep_try_reset_gpu_early_locked(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 }
 
 /**
@@ -1471,9 +1429,9 @@ bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
 	struct kbasep_js_device_data *js_devdata;
 
 	js_devdata = &kbdev->js_data;
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return ret;
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
index 89b1288a1f88..8f1e5615ea43 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -40,7 +40,7 @@
  * calling this.
  *
  * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
  */
 void kbase_job_submit_nolock(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom, int js);
@@ -74,7 +74,7 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string)
  * calling this.
  *
  * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
  */
 void kbase_job_hw_submit(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom,
@@ -91,7 +91,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
  * @target_katom:	Atom to stop
  *
  * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
  */
 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 					int js,
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
index d7b4d3f10592..da7c4df7d277 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -26,7 +26,6 @@
 #include <mali_kbase_js.h>
 #include <mali_kbase_tlstream.h>
 #include <mali_kbase_10969_workaround.h>
-#include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_affinity.h>
@@ -57,7 +56,7 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
 
 	WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
 	rb->write_idx++;
@@ -89,7 +88,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
 		return NULL;
 	}
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
 
@@ -109,7 +108,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
 		return NULL; /* idx out of range */
@@ -147,7 +146,7 @@ static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
 {
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (i = 0; i < SLOT_RB_SIZE; i++) {
 		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -174,7 +173,7 @@ static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
 	int js;
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		for (i = 0; i < SLOT_RB_SIZE; i++) {
@@ -192,7 +191,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
 	int nr = 0;
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (i = 0; i < SLOT_RB_SIZE; i++) {
 		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -210,7 +209,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
 	int nr = 0;
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (i = 0; i < SLOT_RB_SIZE; i++) {
 		if (kbase_gpu_inspect(kbdev, js, i))
@@ -226,7 +225,7 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
 	int nr = 0;
 	int i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (i = 0; i < SLOT_RB_SIZE; i++) {
 		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
@@ -238,56 +237,6 @@ static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
 	return nr;
 }
 
-/**
- * check_secure_atom - Check if the given atom is in the given secure state and
- *                     has a ringbuffer state of at least
- *                     KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION
- * @katom:  Atom pointer
- * @secure: Desired secure state
- *
- * Return: true if atom is in the given state, false otherwise
- */
-static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
-{
-	if (katom->gpu_rb_state >=
-			KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
-			((kbase_jd_katom_is_protected(katom) && secure) ||
-			(!kbase_jd_katom_is_protected(katom) && !secure)))
-		return true;
-
-	return false;
-}
-
-/**
- * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given
- *                                secure state in the ringbuffers of at least
- *                                state
- *                                KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE
- * @kbdev:  Device pointer
- * @secure: Desired secure state
- *
- * Return: true if any atoms are in the given state, false otherwise
- */
-static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
-		bool secure)
-{
-	int js, i;
-
-	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
-		for (i = 0; i < SLOT_RB_SIZE; i++) {
-			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
-					js, i);
-
-			if (katom) {
-				if (check_secure_atom(katom, secure))
-					return true;
-			}
-		}
-	}
-
-	return false;
-}
-
 int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
 {
 	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
@@ -490,7 +439,7 @@ static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
 						recently_chosen_affinity);
 
 			/* Note: this is where the caller must've taken the
-			 * hwaccess_lock */
+			 * runpool_irq.lock */
 
 			/* Check for affinity violations - if there are any,
 			 * then we just ask the caller to requeue and try again
@@ -637,12 +586,15 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		kbase_pm_metrics_update(kbdev, end_timestamp);
 
 		if (katom->core_req & BASE_JD_REQ_PERMON)
-			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+			kbase_pm_release_gpu_cycle_counter(kbdev);
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
 	case KBASE_ATOM_GPU_RB_READY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
 	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
 		kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
 							katom->affinity);
@@ -651,21 +603,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 		break;
 
-	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
-		if (katom->protected_state.enter !=
-				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
-				katom->protected_state.exit !=
-				KBASE_ATOM_EXIT_PROTECTED_CHECK)
-			kbdev->protected_mode_transition = false;
-
-		if (kbase_jd_katom_is_protected(katom) &&
-				(katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2))
-			kbase_vinstr_resume(kbdev->vinstr_ctx);
-
-		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
-	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
 	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
@@ -728,19 +666,11 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 {
 	int err = -EINVAL;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	WARN_ONCE(!kbdev->protected_ops,
 			"Cannot enter protected mode: protected callbacks not specified.\n");
 
-	/*
-	 * When entering into protected mode, we must ensure that the
-	 * GPU is not operating in coherent mode as well. This is to
-	 * ensure that no protected memory can be leaked.
-	 */
-	if (kbdev->system_coherency == COHERENCY_ACE)
-		kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE);
-
 	if (kbdev->protected_ops) {
 		/* Switch GPU to protected mode */
 		err = kbdev->protected_ops->protected_mode_enter(kbdev);
@@ -757,7 +687,7 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 
 static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	WARN_ONCE(!kbdev->protected_ops,
 			"Cannot exit protected mode: protected callbacks not specified.\n");
@@ -765,146 +695,53 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
 	if (!kbdev->protected_ops)
 		return -EINVAL;
 
+	kbdev->protected_mode_transition = true;
 	kbase_reset_gpu_silent(kbdev);
 
 	return 0;
 }
 
-static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev,
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 		struct kbase_jd_atom **katom, int idx, int js)
 {
 	int err = 0;
 
-	switch (katom[idx]->protected_state.enter) {
-	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
-		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
-		 * should ensure that we are not already transitiong, and that
-		 * there are no atoms currently on the GPU. */
-		WARN_ON(kbdev->protected_mode_transition);
-		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
-
-		kbdev->protected_mode_transition = true;
-		katom[idx]->protected_state.enter =
-			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
-
-		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
-		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
-			/*
-			 * We can't switch now because
-			 * the vinstr core state switch
-			 * is not done yet.
-			 */
-			return -EAGAIN;
-		}
-
-		/* Once reaching this point GPU must be
-		 * switched to protected mode or vinstr
-		 * re-enabled. */
-
+	switch (katom[idx]->exit_protected_state) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
 		/*
-		 * Not in correct mode, begin protected mode switch.
-		 * Entering protected mode requires us to power down the L2,
-		 * and drop out of fully coherent mode.
+		 * If the atom ahead of this one hasn't got to being
+		 * submitted yet then bail.
 		 */
-		katom[idx]->protected_state.enter =
-			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
-
-		kbase_pm_update_cores_state_nolock(kbdev);
-
-		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
-		/* Avoid unnecessary waiting on non-ACE platforms. */
-		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
-			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
-				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
-				/*
-				* The L2 is still powered, wait for all the users to
-				* finish with it before doing the actual reset.
-				*/
-				return -EAGAIN;
-			}
-		}
-
-		katom[idx]->protected_state.enter =
-			KBASE_ATOM_ENTER_PROTECTED_FINISHED;
-
-		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
+		if (idx == 1 &&
+			(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+			katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+			return -EAGAIN;
 
-		/* No jobs running, so we can switch GPU mode right now. */
-		err = kbase_gpu_protected_mode_enter(kbdev);
+		/* If we're not exiting protected mode then we're done here. */
+		if (!(kbase_gpu_in_protected_mode(kbdev) &&
+				!kbase_jd_katom_is_protected(katom[idx])))
+			return 0;
 
 		/*
-		 * Regardless of result, we are no longer transitioning
-		 * the GPU.
+		 * If there is a transition in progress, or work still
+		 * on the GPU try again later.
 		 */
-		kbdev->protected_mode_transition = false;
-
-		if (err) {
-			/*
-			 * Failed to switch into protected mode, resume
-			 * vinstr core and fail atom.
-			 */
-			kbase_vinstr_resume(kbdev->vinstr_ctx);
-			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
-			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
-			/* Only return if head atom or previous atom
-			 * already removed - as atoms must be returned
-			 * in order. */
-			if (idx == 0 || katom[0]->gpu_rb_state ==
-					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
-				kbase_gpu_dequeue_atom(kbdev, js, NULL);
-				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
-			}
-			return -EINVAL;
-		}
-
-		/* Protected mode sanity checks. */
-		KBASE_DEBUG_ASSERT_MSG(
-			kbase_jd_katom_is_protected(katom[idx]) ==
-			kbase_gpu_in_protected_mode(kbdev),
-			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
-			kbase_jd_katom_is_protected(katom[idx]),
-			kbase_gpu_in_protected_mode(kbdev));
-		katom[idx]->gpu_rb_state =
-			KBASE_ATOM_GPU_RB_READY;
-	}
-
-	return 0;
-}
-
-static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
-		struct kbase_jd_atom **katom, int idx, int js)
-{
-	int err = 0;
-
-
-	switch (katom[idx]->protected_state.exit) {
-	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
-		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
-		 * should ensure that we are not already transitiong, and that
-		 * there are no atoms currently on the GPU. */
-		WARN_ON(kbdev->protected_mode_transition);
-		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+		if (kbdev->protected_mode_transition ||
+				kbase_gpu_atoms_submitted_any(kbdev))
+			return -EAGAIN;
 
 		/*
 		 * Exiting protected mode requires a reset, but first the L2
 		 * needs to be powered down to ensure it's not active when the
 		 * reset is issued.
 		 */
-		katom[idx]->protected_state.exit =
+		katom[idx]->exit_protected_state =
 				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
 
-		kbdev->protected_mode_transition = true;
-		kbase_pm_update_cores_state_nolock(kbdev);
-
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
 	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
-		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
+		if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) ||
 				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
 			/*
 			 * The L2 is still powered, wait for all the users to
@@ -912,7 +749,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 			 */
 			return -EAGAIN;
 		}
-		katom[idx]->protected_state.exit =
+		katom[idx]->exit_protected_state =
 				KBASE_ATOM_EXIT_PROTECTED_RESET;
 
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -920,10 +757,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 	case KBASE_ATOM_EXIT_PROTECTED_RESET:
 		/* Issue the reset to the GPU */
 		err = kbase_gpu_protected_mode_reset(kbdev);
-
 		if (err) {
-			kbdev->protected_mode_transition = false;
-
 			/* Failed to exit protected mode, fail atom */
 			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
 			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
@@ -941,7 +775,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 			return -EINVAL;
 		}
 
-		katom[idx]->protected_state.exit =
+		katom[idx]->exit_protected_state =
 				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
 
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
@@ -950,9 +784,6 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 		if (kbase_reset_gpu_active(kbdev))
 			return -EAGAIN;
 
-		kbdev->protected_mode_transition = false;
-		kbdev->protected_mode = false;
-
 		/* protected mode sanity checks */
 		KBASE_DEBUG_ASSERT_MSG(
 			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
@@ -967,11 +798,11 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 	return 0;
 }
 
-void kbase_backend_slot_update(struct kbase_device *kbdev)
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
 {
 	int js;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		struct kbase_jd_atom *katom[2];
@@ -1000,26 +831,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 					break;
 
 				katom[idx]->gpu_rb_state =
-				KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT;
 
 			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
-			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
-				if (kbase_gpu_check_secure_atoms(kbdev,
-						!kbase_jd_katom_is_protected(
-						katom[idx])))
-					break;
-
-				if (kbdev->protected_mode_transition)
-					break;
-
-				katom[idx]->gpu_rb_state =
-					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
-
-			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
-
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
 				/*
 				 * Exiting protected mode must be done before
 				 * the references on the cores are taken as
@@ -1027,26 +843,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 				 * can't happen after the references for this
 				 * atom are taken.
 				 */
-
-				if (!kbase_gpu_in_protected_mode(kbdev) &&
-					kbase_jd_katom_is_protected(katom[idx])) {
-					/* Atom needs to transition into protected mode. */
-					ret = kbase_jm_enter_protected_mode(kbdev,
-							katom, idx, js);
-					if (ret)
-						break;
-				} else if (kbase_gpu_in_protected_mode(kbdev) &&
-					!kbase_jd_katom_is_protected(katom[idx])) {
-					/* Atom needs to transition out of protected mode. */
-					ret = kbase_jm_exit_protected_mode(kbdev,
-							katom, idx, js);
-					if (ret)
-						break;
-				}
-				katom[idx]->protected_state.exit =
-						KBASE_ATOM_EXIT_PROTECTED_CHECK;
-
-				/* Atom needs no protected mode transition. */
+				ret = kbase_jm_exit_protected_mode(kbdev,
+						katom, idx, js);
+				if (ret)
+					break;
 
 				katom[idx]->gpu_rb_state =
 					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
@@ -1072,6 +872,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 					break;
 				}
 
+
 				cores_ready =
 					kbasep_js_job_check_ref_cores(kbdev, js,
 								katom[idx]);
@@ -1097,6 +898,81 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 				if (!kbase_gpu_rmu_workaround(kbdev, js))
 					break;
 
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
+
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/*
+				 * If the GPU is transitioning protected mode
+				 * then bail now and we'll be called when the
+				 * new state has settled.
+				 */
+				if (kbdev->protected_mode_transition)
+					break;
+
+				if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) {
+					int err = 0;
+
+					/* Not in correct mode, take action */
+					if (kbase_gpu_atoms_submitted_any(kbdev)) {
+						/*
+						 * We are not in the correct
+						 * GPU mode for this job, and
+						 * we can't switch now because
+						 * there are jobs already
+						 * running.
+						 */
+						break;
+					}
+					if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+						/*
+						 * We can't switch now because
+						 * the vinstr core state switch
+						 * is not done yet.
+						 */
+						break;
+					}
+					/* Once reaching this point GPU must be
+					 * switched to protected mode or vinstr
+					 * re-enabled. */
+
+					/* No jobs running, so we can switch GPU mode right now */
+					err = kbase_gpu_protected_mode_enter(kbdev);
+					if (err) {
+						/*
+						 * Failed to switch into protected mode, resume
+						 * vinstr core and fail atom.
+						 */
+						kbase_vinstr_resume(kbdev->vinstr_ctx);
+						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+						/* Only return if head atom or previous atom
+						 * already removed - as atoms must be returned
+						 * in order */
+						if (idx == 0 || katom[0]->gpu_rb_state ==
+								KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+							kbase_gpu_dequeue_atom(kbdev, js, NULL);
+							kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+						}
+						break;
+					}
+				}
+
+				/* Protected mode sanity checks */
+				KBASE_DEBUG_ASSERT_MSG(
+					kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
 				katom[idx]->gpu_rb_state =
 					KBASE_ATOM_GPU_RB_READY;
 
@@ -1161,9 +1037,10 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 void kbase_backend_run_atom(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
 	kbase_gpu_enqueue_atom(kbdev, katom);
-	kbase_backend_slot_update(kbdev);
+	kbase_gpu_slot_update(kbdev);
 }
 
 bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
@@ -1171,7 +1048,7 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_atom *next_katom;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	katom = kbase_gpu_inspect(kbdev, js, 0);
 	next_katom = kbase_gpu_inspect(kbdev, js, 1);
@@ -1199,7 +1076,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
 	struct kbase_context *kctx = katom->kctx;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
 			completion_code != BASE_JD_EVENT_DONE &&
@@ -1364,42 +1241,31 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 #endif
 
 	if (completion_code == BASE_JD_EVENT_STOPPED)
-		katom = kbase_jm_return_atom_to_js(kbdev, katom);
+		kbase_jm_return_atom_to_js(kbdev, katom);
 	else
-		katom = kbase_jm_complete(kbdev, katom, end_timestamp);
-
-	if (katom) {
-		/* Cross-slot dependency has now become runnable. Try to submit
-		 * it. */
-
-		/* Check if there are lower priority jobs to soft stop */
-		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
-
-		kbase_jm_try_kick(kbdev, 1 << katom->slot_nr);
-	}
+		kbase_jm_complete(kbdev, katom, end_timestamp);
 
 	/* Job completion may have unblocked other atoms. Try to update all job
 	 * slots */
-	kbase_backend_slot_update(kbdev);
+	kbase_gpu_slot_update(kbdev);
 }
 
 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 {
 	int js;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
-		int atom_idx = 0;
 		int idx;
 
-		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+		for (idx = 0; idx < 2; idx++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
-					js, atom_idx);
+									js, 0);
 			bool keep_in_jm_rb = false;
 
 			if (!katom)
-				break;
+				continue;
 
 			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
 				keep_in_jm_rb = true;
@@ -1413,12 +1279,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 			 */
 			if (keep_in_jm_rb) {
 				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-				katom->affinity = 0;
-				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
-				/* As the atom was not removed, increment the
-				 * index so that we read the correct atom in the
-				 * next iteration. */
-				atom_idx++;
+				katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 				continue;
 			}
 
@@ -1431,9 +1292,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 			kbase_jm_complete(kbdev, katom, end_timestamp);
 		}
 	}
-
-	kbdev->protected_mode_transition = false;
-	kbdev->protected_mode = false;
 }
 
 static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
@@ -1503,7 +1361,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
 
 	int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
 	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
@@ -1690,7 +1548,6 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
 	/* Limit the number of loops to avoid a hang if the interrupt is missed
 	 */
 	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
-	unsigned long flags;
 
 	mutex_lock(&kbdev->cacheclean_lock);
 
@@ -1717,10 +1574,8 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
 
 	mutex_unlock(&kbdev->cacheclean_lock);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_pm_unrequest_cores(kbdev, false,
 					katom->need_cache_flush_cores_retained);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_backend_complete_wq(struct kbase_device *kbdev,
@@ -1765,12 +1620,8 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
 		base_jd_core_req core_req, u64 affinity,
 		enum kbase_atom_coreref_state coreref_state)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
 			coreref_state);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.active_count) {
 		mutex_lock(&kbdev->js_data.runpool_mutex);
@@ -1789,7 +1640,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
 
@@ -1811,7 +1662,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 		}
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 }
 
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
index 1e0e05ad3ea4..102d94be93d1 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,6 +66,17 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
 					int idx);
 
+/**
+ * kbase_gpu_slot_update - Update state based on slot ringbuffers
+ *
+ * @kbdev:  Device pointer
+ *
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
 /**
  * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
  *
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
index 54d8ddd80097..d665420ab380 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -94,8 +94,9 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 	base_jd_core_req core_req = katom->core_req;
 	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
 	u64 core_availability_mask;
+	unsigned long flags;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
 
@@ -104,6 +105,7 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 	 * transitioning) then fail.
 	 */
 	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 		*affinity = 0;
 		return false;
 	}
@@ -112,6 +114,7 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 
 	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
 								BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 		 /* If the hardware supports XAFFINITY then we'll only enable
 		  * the tiler (which is the default so this is a no-op),
 		  * otherwise enable shader core 0. */
@@ -166,6 +169,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 		}
 	}
 
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
 	/*
 	 * If no cores are currently available in the desired core group(s)
 	 * (core availability policy is transitioning) then fail.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
index 35d9781ae092..fbffa3b40962 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -38,9 +38,10 @@
  * violated.
  *
  * The following locking conditions are made on the caller
- * - it must hold hwaccess_lock
+ * - it must hold kbasep_js_device_data.runpool_irq.lock
  */
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+									int js);
 
 /**
  * kbase_js_choose_affinity - Compute affinity for a given job.
@@ -70,7 +71,7 @@ bool kbase_js_choose_affinity(u64 * const affinity,
  * @affinity: The affinity mask to test
  *
  * The following locks must be held by the caller
- * - hwaccess_lock
+ * - kbasep_js_device_data.runpool_irq.lock
  *
  * Return: true if the affinity would violate the restrictions
  */
@@ -86,7 +87,7 @@ bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
  * @affinity: The cores to retain
  *
  * The following locks must be held by the caller
- * - hwaccess_lock
+ * - kbasep_js_device_data.runpool_irq.lock
  */
 void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
 								u64 affinity);
@@ -105,7 +106,7 @@ void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
  * %BASE_JM_SUBMIT_SLOTS.
  *
  * The following locks must be held by the caller
- * - hwaccess_lock
+ * - kbasep_js_device_data.runpool_irq.lock
  */
 void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
 								u64 affinity);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
index b09d491c0ec5..a23deb4ca20c 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -104,7 +104,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 	js_devdata = &kbdev->js_data;
 
 	/* Loop through the slots */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
 		struct kbase_jd_atom *atom = NULL;
 
@@ -168,8 +168,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 					 * However, if it's about to be
 					 * increased then the new context can't
 					 * run any jobs until they take the
-					 * hwaccess_lock, so it's OK to observe
-					 * the older value.
+					 * runpool_irq lock, so it's OK to
+					 * observe the older value.
 					 *
 					 * Similarly, if it's about to be
 					 * decreased, the last job from another
@@ -270,7 +270,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 
 	backend->timeouts_updated = false;
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return HRTIMER_NORESTART;
 }
@@ -285,9 +285,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 
 	if (!timer_callback_should_run(kbdev)) {
 		/* Take spinlock to force synchronisation with timer */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 		backend->timer_running = false;
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		/* From now on, return value of timer_callback_should_run() will
 		 * also cause the timer to not requeue itself. Its return value
 		 * cannot change, because it depends on variables updated with
@@ -298,9 +298,9 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 
 	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
 		/* Take spinlock to force synchronisation with timer */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 		backend->timer_running = true;
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		hrtimer_start(&backend->scheduling_timer,
 			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
 							HRTIMER_MODE_REL);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
index 08eea1c104e0..4a3572d971a6 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -97,30 +97,6 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
 	return status;
 }
 
-static void validate_protected_page_fault(struct kbase_device *kbdev,
-		struct kbase_context *kctx)
-{
-	/* GPUs which support (native) protected mode shall not report page
-	 * fault addresses unless it has protected debug mode and protected
-	 * debug mode is turned on */
-	u32 protected_debug_mode = 0;
-
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
-		return;
-
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
-		protected_debug_mode = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(GPU_STATUS),
-				kctx) & GPU_DBGEN;
-	}
-
-	if (!protected_debug_mode) {
-		/* fault_addr should never be reported in protected mode.
-		 * However, we just continue by printing an error message */
-		dev_err(kbdev->dev, "Fault address reported in protected mode\n");
-	}
-}
-
 void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 {
 	const int num_as = 16;
@@ -165,7 +141,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		 */
 		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
 
-
 		/* find faulting address */
 		as->fault_addr = kbase_reg_read(kbdev,
 						MMU_AS_REG(as_no,
@@ -177,15 +152,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 							AS_FAULTADDRESS_LO),
 						kctx);
 
-		/* Mark the fault protected or not */
-		as->protected_mode = kbdev->protected_mode;
-
-		if (kbdev->protected_mode && as->fault_addr)
-		{
-			/* check if address reporting is allowed */
-			validate_protected_page_fault(kbdev, kctx);
-		}
-
 		/* report the fault to debugfs */
 		kbase_as_fault_debugfs_new(kbdev, as_no);
 
@@ -229,9 +195,10 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		}
 
 		/* Process the interrupt for this address space */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 		kbase_mmu_interrupt_process(kbdev, kctx, as);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
 	}
 
 	/* reenable interrupts */
@@ -301,8 +268,6 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
 {
 	int ret;
 
-	lockdep_assert_held(&kbdev->mmu_hw_mutex);
-
 	if (op == AS_COMMAND_UNLOCK) {
 		/* Unlock doesn't require a lock first */
 		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
index f36fdeafdb1e..0919969da523 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -31,7 +31,7 @@
 #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
 
 #include <mali_kbase_pm.h>
-#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
@@ -65,8 +65,6 @@ static int rk_restore_clk_gpu(struct kbase_device *kbdev)
 	return ret;
 }
 
-static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
-
 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
 {
 	struct kbase_pm_callback_conf *callbacks;
@@ -100,14 +98,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 
 	mutex_init(&kbdev->pm.lock);
 
-	kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait",
-			WQ_HIGHPRI | WQ_UNBOUND, 1);
-	if (!kbdev->pm.backend.gpu_poweroff_wait_wq)
-		return -ENOMEM;
-
-	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
-			kbase_pm_gpu_poweroff_wait_wq);
-
 	kbdev->pm.backend.gpu_powered = false;
 	kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_DEBUG
@@ -162,11 +152,10 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
 	kbdev->pm.active_count = 0;
 
+	spin_lock_init(&kbdev->pm.power_change_lock);
 	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
 	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
 
-	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
-
 	if (kbase_pm_ca_init(kbdev) != 0)
 		goto workq_fail;
 
@@ -204,126 +193,50 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
 	rk_restore_clk_gpu(kbdev);
 }
 
-static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
 {
-	struct kbase_device *kbdev = container_of(data, struct kbase_device,
-			pm.backend.gpu_poweroff_wait_work);
-	struct kbase_pm_device_data *pm = &kbdev->pm;
-	struct kbase_pm_backend_data *backend = &pm->backend;
-	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	unsigned long flags;
+	bool cores_are_available;
 
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	D("to slowdown clk_gpu before poweroff pm_cores.");
 	rk_slowdown_clk_gpu_before_poweroff_cores(kbdev);
 
-	/* Wait for power transitions to complete. We do this with no locks held
-	 * so that we don't deadlock with any pending workqueues */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.backend.desired_shader_state = 0;
+	kbdev->pm.backend.desired_tiler_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
 	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
 				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
-	kbase_pm_check_transitions_sync(kbdev);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
 	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
 				SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
 
-	mutex_lock(&js_devdata->runpool_mutex);
-	mutex_lock(&kbdev->pm.lock);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
-	if (!backend->poweron_required) {
-		WARN_ON(kbdev->l2_available_bitmap ||
-				kbdev->shader_available_bitmap ||
-				kbdev->tiler_available_bitmap);
-
-		/* Consume any change-state events */
-		kbase_timeline_pm_check_handle_event(kbdev,
-					KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-
-		/* Disable interrupts and turn the clock off */
-		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
-			/*
-			 * Page/bus faults are pending, must drop locks to
-			 * process.  Interrupts are disabled so no more faults
-			 * should be generated at this point.
-			 */
-			mutex_unlock(&kbdev->pm.lock);
-			mutex_unlock(&js_devdata->runpool_mutex);
-			kbase_flush_mmu_wqs(kbdev);
-			mutex_lock(&js_devdata->runpool_mutex);
-			mutex_lock(&kbdev->pm.lock);
-
-			/* Turn off clock now that fault have been handled. We
-			 * dropped locks so poweron_required may have changed -
-			 * power back on if this is the case.*/
-			if (backend->poweron_required)
-				kbase_pm_clock_on(kbdev, false);
-			else
-				WARN_ON(!kbase_pm_clock_off(kbdev,
-						backend->poweroff_is_suspend));
-		}
-	}
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	backend->poweroff_wait_in_progress = false;
-	if (backend->poweron_required) {
-		backend->poweron_required = false;
-		kbase_pm_update_cores_state_nolock(kbdev);
-	}
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	mutex_unlock(&kbdev->pm.lock);
-	mutex_unlock(&js_devdata->runpool_mutex);
-
-	wake_up(&kbdev->pm.backend.poweroff_wait);
-}
-
-void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
-{
-	unsigned long flags;
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and
+	 * all cache flushes should already have occurred */
 
-	lockdep_assert_held(&kbdev->pm.lock);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
-		/* Force all cores off */
-		kbdev->pm.backend.desired_shader_state = 0;
-		kbdev->pm.backend.desired_tiler_state = 0;
-
-		/* Force all cores to be unavailable, in the situation where
-		 * transitions are in progress for some cores but not others,
-		 * and kbase_pm_check_transitions_nolock can not immediately
-		 * power off the cores */
-		kbdev->shader_available_bitmap = 0;
-		kbdev->tiler_available_bitmap = 0;
-		kbdev->l2_available_bitmap = 0;
-
-		kbdev->pm.backend.poweroff_wait_in_progress = true;
-		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
-
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		/*Kick off wq here. Callers will have to wait*/
-		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
-				&kbdev->pm.backend.gpu_poweroff_wait_work);
-	} else {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	}
-}
-
-static bool is_poweroff_in_progress(struct kbase_device *kbdev)
-{
-	bool ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return ret;
-}
-
-void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev)
-{
-	wait_event_killable(kbdev->pm.backend.poweroff_wait,
-			is_poweroff_in_progress(kbdev));
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev,
+				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	return kbase_pm_clock_off(kbdev, is_suspend);
 }
 
 int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
@@ -395,7 +308,15 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
 
 	mutex_lock(&kbdev->pm.lock);
 	kbase_pm_cancel_deferred_poweroff(kbdev);
-	kbase_pm_do_poweroff(kbdev, false);
+	if (!kbase_pm_do_poweroff(kbdev, false)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
 	mutex_unlock(&kbdev->pm.lock);
 }
 
@@ -413,8 +334,6 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
 
 	/* Shut down the metrics subsystem */
 	kbasep_pm_metrics_term(kbdev);
-
-	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
 }
 
 void kbase_pm_power_changed(struct kbase_device *kbdev)
@@ -424,8 +343,9 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
 
 	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
 				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 	KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
 				SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
 
@@ -435,9 +355,10 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
 		kbase_timeline_pm_handle_event(kbdev,
 				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 
-		kbase_backend_slot_update(kbdev);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_gpu_slot_update(kbdev);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 	}
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
@@ -472,16 +393,21 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 	 * off prematurely) */
 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
-
 	kbase_pm_cancel_deferred_poweroff(kbdev);
-	kbase_pm_do_poweroff(kbdev, true);
+	if (!kbase_pm_do_poweroff(kbdev, true)) {
+		/* Page/bus faults are pending, must drop pm.lock to process.
+		 * Interrupts are disabled so no more faults should be
+		 * generated at this point */
+		mutex_unlock(&kbdev->pm.lock);
+		kbase_flush_mmu_wqs(kbdev);
+		mutex_lock(&kbdev->pm.lock);
+		WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+	}
 
 	kbase_backend_timer_suspend(kbdev);
 
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
-
-	kbase_pm_wait_for_poweroff_complete(kbdev);
 }
 
 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
index e8cd8cbd35af..4b903cca020b 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,10 +95,10 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->pm.lock);
 
 	/* Remove the policy to prevent IRQ handlers from working on it */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	old_policy = kbdev->pm.backend.ca_current_policy;
 	kbdev->pm.backend.ca_current_policy = NULL;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	if (old_policy->term)
 		old_policy->term(kbdev);
@@ -106,7 +106,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 	if (new_policy->init)
 		new_policy->init(kbdev);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbdev->pm.backend.ca_current_policy = new_policy;
 
 	/* If any core power state changes were previously attempted, but
@@ -118,7 +118,7 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 					kbdev->shader_ready_bitmap,
 					kbdev->shader_transitioning_bitmap);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	mutex_unlock(&kbdev->pm.lock);
 
@@ -131,7 +131,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	/* All cores must be enabled when instrumentation is in use */
 	if (kbdev->pm.backend.instr_enabled)
@@ -151,7 +151,7 @@ KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
 void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
 							u64 cores_transitioning)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	if (kbdev->pm.backend.ca_current_policy != NULL)
 		kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
@@ -163,17 +163,20 @@ void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbdev->pm.backend.instr_enabled = true;
 
 	kbase_pm_update_cores_state_nolock(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbdev->pm.backend.instr_enabled = false;
 
 	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
index 99fb62d006bc..e8f96fe6c514 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -207,17 +207,6 @@ union kbase_pm_ca_policy_data {
  *                          power_change_lock should be held when accessing,
  *                          unless there is no way the timer can be running (eg
  *                          hrtimer_cancel() was called immediately before)
- * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
- *                             hwaccess_lock must be held when accessing
- * @poweron_required: true if a GPU power on is required. Should only be set
- *                    when poweroff_wait_in_progress is true, and therefore the
- *                    GPU can not immediately be powered on. pm.lock must be
- *                    held when accessing
- * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend
- *                       request. pm.lock must be held when accessing
- * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off
- * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq
- * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete
  * @callback_power_on: Callback when the GPU needs to be turned on. See
  *                     &struct kbase_pm_callback_conf
  * @callback_power_off: Callback when the GPU may be turned off. See
@@ -292,15 +281,6 @@ struct kbase_pm_backend_data {
 	bool poweroff_timer_needed;
 	bool poweroff_timer_running;
 
-	bool poweroff_wait_in_progress;
-	bool poweron_required;
-	bool poweroff_is_suspend;
-
-	struct workqueue_struct *gpu_poweroff_wait_wq;
-	struct work_struct gpu_poweroff_wait_work;
-
-	wait_queue_head_t poweroff_wait;
-
 	int (*callback_power_on)(struct kbase_device *kbdev);
 	void (*callback_power_off)(struct kbase_device *kbdev);
 	void (*callback_power_suspend)(struct kbase_device *kbdev);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
index a162ff8bc0e4..046ebcb7b508 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -151,7 +151,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
 	u32 lo = cores & 0xFFFFFFFF;
 	u32 hi = (cores >> 32) & 0xFFFFFFFF;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	reg = core_type_to_reg(core_type, action);
 
@@ -407,7 +407,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
 	u64 powering_on_trans;
 	u64 desired_state_in_use;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	/* Get current state */
 	present = kbase_pm_get_present_cores(kbdev, type);
@@ -451,7 +451,7 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
 			kbdev->pm.backend.l2_powered = 0;
 	}
 
-	if (desired_state == ready && (trans == 0))
+	if (desired_state_in_use == ready && (trans == 0))
 		return true;
 
 	/* Restrict the cores to those that are actually present */
@@ -562,7 +562,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 	u64 prev_l2_available_bitmap;
 
 	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
 	if (kbdev->pm.backend.gpu_powered == false) {
@@ -734,7 +734,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 				KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
 		/* Wake slow-path waiters. Job scheduler does not use this. */
 		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
-
 		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
 	}
 
@@ -791,13 +790,11 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
 
 	/* Force the transition to be checked and reported - the cores may be
 	 * 'available' (for job submission) but not fully powered up. */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-
 	/* Don't need 'cores_are_available', because we don't return anything */
 	CSTD_UNUSED(cores_are_available);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	timeout = jiffies + PM_TIMEOUT;
 
@@ -868,12 +865,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 	 * Clear all interrupts,
 	 * and unmask them all.
 	 */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
 									NULL);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
 									NULL);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
 									NULL);
@@ -885,18 +882,21 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
 
-void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 {
+	unsigned long flags;
+
 	KBASE_DEBUG_ASSERT(NULL != kbdev);
 	/*
 	 * Mask all interrupts,
 	 * and clear them all.
 	 */
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
 									NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
 									NULL);
@@ -905,18 +905,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
 	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
 }
 
-void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_pm_disable_interrupts_nolock(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-}
-
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
-
 /*
  * pmu layout:
  * 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -969,10 +959,12 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 		kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
 	}
 
-	mutex_lock(&kbdev->mmu_hw_mutex);
 	/* Reprogram the GPU's MMU */
 	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		struct kbase_as *as = &kbdev->as[i];
+
+		mutex_lock(&as->transaction_mutex);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 		if (js_devdata->runpool_irq.per_as_data[i].kctx)
 			kbase_mmu_update(
@@ -980,9 +972,9 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 		else
 			kbase_mmu_disable_as(kbdev, i);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&as->transaction_mutex);
 	}
-	mutex_unlock(&kbdev->mmu_hw_mutex);
 
 	/* Lastly, enable the interrupts */
 	kbase_pm_enable_interrupts(kbdev);
@@ -1219,7 +1211,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 
 void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
 {
-	if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) &&
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
 		!kbdev->cci_snoop_enabled) {
 #ifdef CONFIG_ARM64
 		if (kbdev->snoop_enable_smc != 0)
@@ -1232,7 +1224,8 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
 
 void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
 {
-	if (kbdev->cci_snoop_enabled) {
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		kbdev->cci_snoop_enabled) {
 #ifdef CONFIG_ARM64
 		if (kbdev->snoop_disable_smc != 0) {
 			mali_cci_flush_l2(kbdev);
@@ -1362,7 +1355,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	kbdev->pm.backend.reset_done = false;
 
 	/* The cores should be made unavailable due to the reset */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
 	if (kbdev->shader_available_bitmap != 0u)
 			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
 						NULL, 0u, (u32)0u);
@@ -1372,7 +1365,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	kbdev->shader_available_bitmap = 0u;
 	kbdev->tiler_available_bitmap = 0u;
 	kbdev->l2_available_bitmap = 0u;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
 
 	/* Soft reset the GPU */
 	if (kbdev->protected_mode_support &&
@@ -1381,11 +1374,12 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	else
 		err = kbase_pm_reset_do_normal(kbdev);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags);
 	if (kbdev->protected_mode)
 		resume_vinstr = true;
+	kbdev->protected_mode_transition = false;
 	kbdev->protected_mode = false;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags);
 
 	if (err)
 		goto exit;
@@ -1394,6 +1388,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 		kbase_pm_hw_issues_detect(kbdev);
 
 	kbase_pm_hw_issues_apply(kbdev);
+
 	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
 
 	/* Sanity check protected mode was left after reset */
@@ -1423,10 +1418,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 								irq_flags);
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 		kbase_pm_release_l2_caches(kbdev);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
-
 		kbase_pm_disable_interrupts(kbdev);
 	}
 
@@ -1508,14 +1500,12 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
 
-void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
 	spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 									flags);
 
@@ -1534,15 +1524,4 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev)
 	kbase_pm_release_l2_caches(kbdev);
 }
 
-void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-}
-
 KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
index ad2667ae7c21..aa51b8cdef8f 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -167,16 +167,6 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
  */
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
 
-/**
- * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts()
- *                                      that does not take the hwaccess_lock
- *
- * Caller must hold the hwaccess_lock.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev);
-
 /**
  * kbase_pm_init_hw - Initialize the hardware.
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -383,35 +373,14 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
  * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no
  *                                      longer in use
  *
- * If the caller is the last caller then the GPU cycle counters will be
- * disabled. A request must have been made before a call to this.
- *
- * Caller must not hold the hwaccess_lock, as it will be taken in this function.
- * If the caller is already holding this lock then
- * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead.
+ * If the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
 
-/**
- * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter()
- *                                             that does not take hwaccess_lock
- *
- * Caller must hold the hwaccess_lock.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to
- *                                       complete
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev);
-
 /**
  * kbase_pm_register_access_enable - Enable access to GPU registers
  *
@@ -485,8 +454,12 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
  *              pointer)
  * @is_suspend: true if power off due to suspend,
  *              false otherwise
+ * Return:
+ *         true      if power was turned off, else
+ *         false     if power can not be turned off due to pending page/bus
+ *                   fault workers. Caller must flush MMU workqueues and retry
  */
-void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
 
 #ifdef CONFIG_PM_DEVFREQ
 void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
@@ -523,7 +496,7 @@ void kbase_pm_power_changed(struct kbase_device *kbdev);
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  * @now:   Pointer to the timestamp of the change, or NULL to use current time
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  */
 void kbase_pm_metrics_update(struct kbase_device *kbdev,
 				ktime_t *now);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
index 7613e1d39fdf..ae632564b96a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -362,15 +362,14 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 				int device_nr = (katom->core_req &
 					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
 						? katom->device_nr : 0;
-				if (!WARN_ON(device_nr >= 2))
-					kbdev->pm.backend.metrics.
-						active_cl_ctx[device_nr] = 1;
+				WARN_ON(device_nr >= 2);
+				kbdev->pm.backend.metrics.active_cl_ctx[
+						device_nr] = 1;
 			} else {
 				/* Slot 2 should not be running non-compute
 				 * atoms */
-				if (!WARN_ON(js >= 2))
-					kbdev->pm.backend.metrics.
-						active_gl_ctx[js] = 1;
+				WARN_ON(js >= 2);
+				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
 			}
 			kbdev->pm.backend.metrics.gpu_active = true;
 		}
@@ -383,7 +382,7 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
 	unsigned long flags;
 	ktime_t now;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
index 92457e8c0054..4d006028089a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -156,7 +156,7 @@ static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
 	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
 	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	kbdev->pm.backend.desired_shader_state &=
 			~kbdev->pm.backend.shader_poweroff_pending;
@@ -193,7 +193,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
 	kbdev = container_of(timer, struct kbase_device,
 						pm.backend.gpu_poweroff_timer);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	/* It is safe for this call to do nothing if the work item is already
 	 * queued. The worker function will read the must up-to-date state of
@@ -220,7 +220,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
 	}
 
 	if (kbdev->pm.backend.poweroff_timer_needed) {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
 
@@ -228,7 +228,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
 	}
 
 	kbdev->pm.backend.poweroff_timer_running = false;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	return HRTIMER_NORESTART;
 }
@@ -258,13 +258,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
 
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	/* Only power off the GPU if a request is still pending */
 	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
 		do_poweroff = true;
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	if (do_poweroff) {
 		kbdev->pm.backend.poweroff_timer_needed = false;
@@ -272,7 +272,14 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
 		kbdev->pm.backend.poweroff_timer_running = false;
 
 		/* Power off the GPU */
-		kbase_pm_do_poweroff(kbdev, false);
+		if (!kbase_pm_do_poweroff(kbdev, false)) {
+			/* GPU can not be powered off at present */
+			kbdev->pm.backend.poweroff_timer_needed = true;
+			kbdev->pm.backend.poweroff_timer_running = true;
+			hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+					kbdev->pm.gpu_poweroff_time,
+					HRTIMER_MODE_REL);
+		}
 	}
 
 	mutex_unlock(&kbdev->pm.lock);
@@ -318,7 +325,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 
 	kbdev->pm.backend.poweroff_timer_needed = false;
 	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbdev->pm.backend.poweroff_timer_running = false;
 
 	/* If wq is already running but is held off by pm.lock, make sure it has
@@ -329,7 +336,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 	kbdev->pm.backend.tiler_poweroff_pending = 0;
 	kbdev->pm.backend.shader_poweroff_pending_time = 0;
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 void kbase_pm_update_active(struct kbase_device *kbdev)
@@ -344,7 +351,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 	/* pm_current_policy will never be NULL while pm.lock is held */
 	KBASE_DEBUG_ASSERT(backend->pm_current_policy);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&pm->power_change_lock, flags);
 
 	active = backend->pm_current_policy->get_core_active(kbdev);
 
@@ -356,7 +363,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 			/* If a request was pending then the GPU was still
 			 * powered, so no need to continue */
 			if (!kbdev->poweroff_pending) {
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+				spin_unlock_irqrestore(&pm->power_change_lock,
 						flags);
 				return;
 			}
@@ -372,14 +379,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 					HRTIMER_MODE_REL);
 		}
 
+		spin_unlock_irqrestore(&pm->power_change_lock, flags);
+
 		/* Power on the GPU and any cores requested by the policy */
-		if (pm->backend.poweroff_wait_in_progress) {
-			pm->backend.poweron_required = true;
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		} else {
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-			kbase_pm_do_poweron(kbdev, false);
-		}
+		kbase_pm_do_poweron(kbdev, false);
 	} else {
 		/* It is an error for the power policy to power off the GPU
 		 * when there are contexts active */
@@ -411,17 +414,35 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 						pm->gpu_poweroff_time,
 						HRTIMER_MODE_REL);
 				}
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+				spin_unlock_irqrestore(&pm->power_change_lock,
 						flags);
 			} else {
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+				spin_unlock_irqrestore(&pm->power_change_lock,
 						flags);
 
 				/* Power off the GPU immediately */
-				kbase_pm_do_poweroff(kbdev, false);
+				if (!kbase_pm_do_poweroff(kbdev, false)) {
+					/* GPU can not be powered off at present
+					 */
+					spin_lock_irqsave(
+							&pm->power_change_lock,
+							flags);
+					backend->poweroff_timer_needed = true;
+					if (!backend->poweroff_timer_running) {
+						backend->poweroff_timer_running
+								= true;
+						hrtimer_start(
+						&backend->gpu_poweroff_timer,
+							pm->gpu_poweroff_time,
+							HRTIMER_MODE_REL);
+					}
+					spin_unlock_irqrestore(
+							&pm->power_change_lock,
+							flags);
+				}
 			}
 		} else {
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			spin_unlock_irqrestore(&pm->power_change_lock, flags);
 		}
 	}
 }
@@ -433,37 +454,25 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 	bool cores_are_available;
 	bool do_poweroff = false;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	if (kbdev->pm.backend.pm_current_policy == NULL)
 		return;
-	if (kbdev->pm.backend.poweroff_wait_in_progress)
-		return;
 
-	if (kbdev->protected_mode_transition &&	!kbdev->shader_needed_bitmap &&
-			!kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
-			&& !kbdev->tiler_inuse_cnt) {
-		/* We are trying to change in/out of protected mode - force all
-		 * cores off so that the L2 powers down */
-		desired_bitmap = 0;
-		desired_tiler_bitmap = 0;
-	} else {
-		desired_bitmap =
+	desired_bitmap =
 		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
-		desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_tiler_bitmap = 1;
+	else
+		desired_tiler_bitmap = 0;
 
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+		/* Unless XAFFINITY is supported, enable core 0 if tiler
+		 * required, regardless of core availability */
 		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
-			desired_tiler_bitmap = 1;
-		else
-			desired_tiler_bitmap = 0;
-
-		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
-			/* Unless XAFFINITY is supported, enable core 0 if tiler
-			 * required, regardless of core availability */
-			if (kbdev->tiler_needed_cnt > 0 ||
-					kbdev->tiler_inuse_cnt > 0)
-				desired_bitmap |= 1;
-		}
+			desired_bitmap |= 1;
 	}
 
 	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
@@ -486,8 +495,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 				(kbdev->pm.backend.desired_tiler_state &
 							~desired_tiler_bitmap);
 
-			if (kbdev->pm.poweroff_shader_ticks &&
-					!kbdev->protected_mode_transition)
+			if (kbdev->pm.poweroff_shader_ticks)
 				kbdev->pm.backend.shader_poweroff_pending_time =
 						kbdev->pm.poweroff_shader_ticks;
 			else
@@ -509,8 +517,7 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 		kbdev->pm.backend.tiler_poweroff_pending |=
 				(kbdev->pm.backend.desired_tiler_state &
 							~desired_tiler_bitmap);
-		if (kbdev->pm.poweroff_shader_ticks &&
-				!kbdev->protected_mode_transition)
+		if (kbdev->pm.poweroff_shader_ticks)
 			kbdev->pm.backend.shader_poweroff_pending_time =
 					kbdev->pm.poweroff_shader_ticks;
 		else
@@ -556,11 +563,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	kbase_pm_update_cores_state_nolock(kbdev);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
@@ -605,10 +612,10 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->pm.lock);
 
 	/* Remove the policy to prevent IRQ handlers from working on it */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	old_policy = kbdev->pm.backend.pm_current_policy;
 	kbdev->pm.backend.pm_current_policy = NULL;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
 								old_policy->id);
@@ -620,9 +627,9 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	if (new_policy->init)
 		new_policy->init(kbdev);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 	kbdev->pm.backend.pm_current_policy = new_policy;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 
 	/* If any core power state changes were previously attempted, but
 	 * couldn't be made because the policy was changing (current_policy was
@@ -657,13 +664,14 @@ kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
 void kbase_pm_request_cores(struct kbase_device *kbdev,
 				bool tiler_required, u64 shader_cores)
 {
+	unsigned long flags;
 	u64 cores;
 
 	kbase_pm_change_state change_gpu_state = 0u;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	cores = shader_cores;
 	while (cores) {
@@ -706,6 +714,8 @@ void kbase_pm_request_cores(struct kbase_device *kbdev,
 					KBASE_PM_FUNC_ID_REQUEST_CORES_END,
 							change_gpu_state);
 	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
@@ -713,11 +723,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
 void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
 				bool tiler_required, u64 shader_cores)
 {
+	unsigned long flags;
+
 	kbase_pm_change_state change_gpu_state = 0u;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	while (shader_cores) {
 		int bitnum = fls64(shader_cores) - 1;
@@ -758,6 +770,8 @@ void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
 		 * - no-one will wait on the state change */
 		kbase_pm_trace_check_and_finish_state_change(kbdev);
 	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
@@ -766,10 +780,11 @@ enum kbase_pm_cores_ready
 kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
 				bool tiler_required, u64 shader_cores)
 {
+	unsigned long flags;
 	u64 prev_shader_needed;	/* Just for tracing */
 	u64 prev_shader_inuse;	/* Just for tracing */
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	prev_shader_needed = kbdev->shader_needed_bitmap;
 	prev_shader_inuse = kbdev->shader_inuse_bitmap;
@@ -780,15 +795,16 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
 	 * be chosen */
 	if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
 							shader_cores) {
-		return (kbdev->pm.backend.poweroff_wait_in_progress ||
-				kbdev->pm.backend.pm_current_policy == NULL) ?
-				KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
 	}
 
 	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
 	    (tiler_required && !kbdev->tiler_available_bitmap)) {
 		/* Trace ongoing core transition */
 		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 		return KBASE_CORES_NOT_READY;
 	}
 
@@ -837,6 +853,8 @@ kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
 		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
 				NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
 
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
 	return KBASE_CORES_READY;
 }
 
@@ -845,11 +863,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
 void kbase_pm_release_cores(struct kbase_device *kbdev,
 				bool tiler_required, u64 shader_cores)
 {
+	unsigned long flags;
 	kbase_pm_change_state change_gpu_state = 0u;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	while (shader_cores) {
 		int bitnum = fls64(shader_cores) - 1;
@@ -894,6 +913,8 @@ void kbase_pm_release_cores(struct kbase_device *kbdev,
 		/* Trace that any state change completed immediately */
 		kbase_pm_trace_check_and_finish_state_change(kbdev);
 	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
@@ -902,13 +923,7 @@ void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
 					bool tiler_required,
 					u64 shader_cores)
 {
-	unsigned long flags;
-
-	kbase_pm_wait_for_poweroff_complete(kbdev);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	kbase_pm_check_transitions_sync(kbdev);
 }
@@ -920,7 +935,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
 	unsigned long flags;
 	u32 prior_l2_users_count;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	prior_l2_users_count = kbdev->l2_users_count++;
 
@@ -932,7 +947,7 @@ void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
 	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
 		kbase_pm_check_transitions_nolock(kbdev);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 	wait_event(kbdev->pm.backend.l2_powered_wait,
 					kbdev->pm.backend.l2_powered == 1);
 
@@ -944,16 +959,22 @@ KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
 
 void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
 
 void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
 
@@ -964,6 +985,8 @@ void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
 		/* Trace that any state change completed immediately */
 		kbase_pm_trace_check_and_finish_state_change(kbdev);
 	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
index 8b07cbc5eae8..f7c0ff674906 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -50,7 +50,6 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_END
 };
 
@@ -193,31 +192,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = {
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_tHEx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_MODE,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_END
-};
 
 
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
index 4d95b4f9f649..149f44cb8674 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -113,8 +113,6 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_TMIX_8138,
 	BASE_HW_ISSUE_TMIX_8206,
 	BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8463,
-	BASE_HW_ISSUE_TMIX_8456,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -946,8 +944,6 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TMIX_8138,
 	BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8463,
-	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_END
 };
 
@@ -965,8 +961,6 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
 	BASE_HW_ISSUE_TMIX_8138,
 	BASE_HW_ISSUE_TMIX_8206,
 	BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8463,
-	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_END
 };
 
@@ -982,32 +976,11 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
 	BASE_HW_ISSUE_TMIX_8138,
 	BASE_HW_ISSUE_TMIX_8206,
 	BASE_HW_ISSUE_TMIX_8343,
-	BASE_HW_ISSUE_TMIX_8456,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_END
-};
 
-static const enum base_hw_issue base_hw_issues_model_tHEx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8042,
-	BASE_HW_ISSUE_TMIX_8133,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
 
 
 
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
index bcb05e40f7fa..749dd9a1cc9e 100644
--- a/drivers/gpu/arm/midgard/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -45,9 +45,6 @@
 /* Support UK10_2 IOCTLS */
 #define BASE_LEGACY_UK10_2_SUPPORT 1
 
-/* Support UK10_4 IOCTLS */
-#define BASE_LEGACY_UK10_4_SUPPORT 1
-
 typedef struct base_mem_handle {
 	struct {
 		u64 handle;
@@ -1810,10 +1807,4 @@ typedef struct base_profiling_controls {
 	u32 profiling_controls[FBDUMP_CONTROL_MAX];
 } base_profiling_controls;
 
-/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
- * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
-#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
-
-#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)
-
 #endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
index 443d4b13a4fb..b6d28fea9987 100644
--- a/drivers/gpu/arm/midgard/mali_kbase.h
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -45,15 +45,10 @@
 #include <mali_kbase_uku.h>
 #include <mali_kbase_linux.h>
 
-/*
- * Include mali_kbase_defs.h first as this provides types needed by other local
- * header files.
- */
-#include "mali_kbase_defs.h"
-
-#include "mali_kbase_context.h"
 #include "mali_kbase_strings.h"
+#include "mali_kbase_pm.h"
 #include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
 #include "mali_kbase_trace_timeline.h"
 #include "mali_kbase_js.h"
 #include "mali_kbase_mem.h"
@@ -110,6 +105,7 @@ u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
 struct kbase_context *
 kbase_create_context(struct kbase_device *kbdev, bool is_compat);
 void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
 
 int kbase_jd_init(struct kbase_context *kctx);
 void kbase_jd_exit(struct kbase_context *kctx);
@@ -167,7 +163,7 @@ void kbase_gpu_cacheclean(struct kbase_device *kbdev,
  * than @katom will be soft stopped and put back in the queue, so that atoms
  * with higher priority can run.
  *
- * The hwaccess_lock must be held when calling this function.
+ * The js_data.runpool_irq.lock must be held when calling this function.
  */
 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 				struct kbase_jd_atom *katom);
@@ -550,58 +546,4 @@ void kbasep_trace_dump(struct kbase_device *kbdev);
 void kbase_set_driver_inactive(struct kbase_device *kbdev, bool inactive);
 #endif /* CONFIG_MALI_DEBUG */
 
-
-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
-
-/* kbase_io_history_init - initialize data struct for register access history
- *
- * @kbdev The register history to initialize
- * @n The number of register accesses that the buffer could hold
- *
- * @return 0 if successfully initialized, failure otherwise
- */
-int kbase_io_history_init(struct kbase_io_history *h, u16 n);
-
-/* kbase_io_history_term - uninit all resources for the register access history
- *
- * @h The register history to terminate
- */
-void kbase_io_history_term(struct kbase_io_history *h);
-
-/* kbase_io_history_dump - print the register history to the kernel ring buffer
- *
- * @kbdev Pointer to kbase_device containing the register history to dump
- */
-void kbase_io_history_dump(struct kbase_device *kbdev);
-
-/**
- * kbase_io_history_resize - resize the register access history buffer.
- *
- * @h: Pointer to a valid register history to resize
- * @new_size: Number of accesses the buffer could hold
- *
- * A successful resize will clear all recent register accesses.
- * If resizing fails for any reason (e.g., could not allocate memory, invalid
- * buffer size) then the original buffer will be kept intact.
- *
- * @return 0 if the buffer was resized, failure otherwise
- */
-int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size);
-
-#else /* CONFIG_DEBUG_FS */
-
-#define kbase_io_history_init(...) ((int)0)
-
-#define kbase_io_history_term CSTD_NOP
-
-#define kbase_io_history_dump CSTD_NOP
-
-#define kbase_io_history_resize CSTD_NOP
-
-#endif /* CONFIG_DEBUG_FS */
-
-
 #endif
-
-
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
index e674cc2ea183..9b00cce9b2b3 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -201,13 +201,13 @@ enum {
 /*
  * Default minimum number of scheduling ticks before jobs are hard-stopped
  */
-#define DEFAULT_JS_HARD_STOP_TICKS_SS    (100) /* 10s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS_8408  (300) /* 30s */
 
 /*
  * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
  */
-#define DEFAULT_JS_HARD_STOP_TICKS_CL    (100) /* 10s */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
 
 /*
  * Default minimum number of scheduling ticks before jobs are hard-stopped
@@ -225,14 +225,14 @@ enum {
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
  */
-#define DEFAULT_JS_RESET_TICKS_SS           (105) /* 10.5s */
+#define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
 #define DEFAULT_JS_RESET_TICKS_SS_8408     (450) /* 45s */
 
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" CL job.
  */
-#define DEFAULT_JS_RESET_TICKS_CL        (105) /* 10.5s */
+#define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
 
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
index 55c5ef613c37..344a1f16de8a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -53,13 +53,13 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 
 	kctx->kbdev = kbdev;
 	kctx->as_nr = KBASEP_AS_NR_INVALID;
-	if (is_compat)
-		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
+	kctx->is_compat = is_compat;
 #ifdef CONFIG_MALI_TRACE_TIMELINE
 	kctx->timeline.owner_tgid = task_tgid_nr(current);
 #endif
 	atomic_set(&kctx->setup_complete, 0);
 	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->infinite_cache_active = 0;
 	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
@@ -108,15 +108,11 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	if (err)
 		goto term_dma_fence;
 
-	do {
-		err = kbase_mem_pool_grow(&kctx->mem_pool,
-				MIDGARD_MMU_BOTTOMLEVEL);
-		if (err)
-			goto pgd_no_mem;
-		kctx->pgd = kbase_mmu_alloc_pgd(kctx);
-	} while (!kctx->pgd);
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
 
-	kctx->aliasing_sink_page = kbase_mem_alloc_page(kctx->kbdev);
+	kctx->aliasing_sink_page = kbase_mem_pool_alloc(&kctx->mem_pool);
 	if (!kctx->aliasing_sink_page)
 		goto no_sink_page;
 
@@ -166,7 +162,7 @@ no_sink_page:
 	kbase_gpu_vm_lock(kctx);
 	kbase_mmu_free_pgd(kctx);
 	kbase_gpu_vm_unlock(kctx);
-pgd_no_mem:
+free_mmu:
 	kbase_mmu_term(kctx);
 term_dma_fence:
 	kbase_dma_fence_term(kctx);
@@ -304,16 +300,17 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
 	}
 
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
 
 	/* Translate the flags */
 	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
-		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
 
 	/* Latch the initial attributes into the Job Scheduler */
 	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
 
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+			irq_flags);
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
  out:
 	return err;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h
deleted file mode 100644
index a3f5bb0ce0da..000000000000
--- a/drivers/gpu/arm/midgard/mali_kbase_context.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-#ifndef _KBASE_CONTEXT_H_
-#define _KBASE_CONTEXT_H_
-
-#include <linux/atomic.h>
-
-
-int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
-
-/**
- * kbase_ctx_flag - Check if @flag is set on @kctx
- * @kctx: Pointer to kbase context to check
- * @flag: Flag to check
- *
- * Return: true if @flag is set on @kctx, false if not.
- */
-static inline bool kbase_ctx_flag(struct kbase_context *kctx,
-				      enum kbase_context_flags flag)
-{
-	return atomic_read(&kctx->flags) & flag;
-}
-
-/**
- * kbase_ctx_flag_clear - Clear @flag on @kctx
- * @kctx: Pointer to kbase context
- * @flag: Flag to clear
- *
- * Clear the @flag on @kctx. This is done atomically, so other flags being
- * cleared or set at the same time will be safe.
- *
- * Some flags have locking requirements, check the documentation for the
- * respective flags.
- */
-static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
-					enum kbase_context_flags flag)
-{
-#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
-	/*
-	 * Earlier kernel versions doesn't have atomic_andnot() or
-	 * atomic_and(). atomic_clear_mask() was only available on some
-	 * architectures and removed on arm in v3.13 on arm and arm64.
-	 *
-	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
-	 * when atomic_andnot() becomes available.
-	 */
-	int old, new;
-
-	do {
-		old = atomic_read(&kctx->flags);
-		new = old & ~flag;
-
-	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
-#else
-	atomic_andnot(flag, &kctx->flags);
-#endif
-}
-
-/**
- * kbase_ctx_flag_set - Set @flag on @kctx
- * @kctx: Pointer to kbase context
- * @flag: Flag to clear
- *
- * Set the @flag on @kctx. This is done atomically, so other flags being
- * cleared or set at the same time will be safe.
- *
- * Some flags have locking requirements, check the documentation for the
- * respective flags.
- */
-static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
-				      enum kbase_context_flags flag)
-{
-	atomic_or(flag, &kctx->flags);
-}
-#endif /* _KBASE_CONTEXT_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
index da6b8e9ef4b1..499ef46a0fe2 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -35,7 +35,6 @@
 #if !MALI_CUSTOMER_RELEASE
 #include "mali_kbase_regs_dump_debugfs.h"
 #endif /* !MALI_CUSTOMER_RELEASE */
-#include "mali_kbase_regs_history_debugfs.h"
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
@@ -118,6 +117,39 @@ static inline void __compile_time_asserts(void)
 	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
 }
 
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+#endif /* CONFIG_KDS */
+
 static void kbase_create_timeline_objects(struct kbase_context *kctx)
 {
 	struct kbase_device             *kbdev = kctx->kbdev;
@@ -259,7 +291,6 @@ enum {
 	inited_debugfs = (1u << 15),
 	inited_gpu_device = (1u << 16),
 	inited_registers_map = (1u << 17),
-	inited_io_history = (1u << 18),
 	inited_power_control = (1u << 19),
 	inited_buslogger = (1u << 20)
 };
@@ -371,7 +402,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 				goto bad_size;
 
 #if defined(CONFIG_64BIT)
-			if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+			if (!kctx->is_compat) {
 				/* force SAME_VA if a 64-bit client */
 				mem->flags |= BASE_MEM_SAME_VA;
 			}
@@ -392,7 +423,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 			if (sizeof(*mem_import) != args_size)
 				goto bad_size;
 #ifdef CONFIG_COMPAT
-			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			if (kctx->is_compat)
 				phandle = compat_ptr(mem_import->phandle.compat_value);
 			else
 #endif
@@ -433,7 +464,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 			}
 
 #ifdef CONFIG_COMPAT
-			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			if (kctx->is_compat)
 				user_ai = compat_ptr(alias->ai.compat_value);
 			else
 #endif
@@ -871,14 +902,14 @@ copy_failed:
 			}
 
 #ifdef CONFIG_COMPAT
-			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			if (kctx->is_compat)
 				user_buf = compat_ptr(add_data->buf.compat_value);
 			else
 #endif
 				user_buf = add_data->buf.value;
 
 			buf = kmalloc(add_data->len, GFP_KERNEL);
-			if (ZERO_OR_NULL_PTR(buf))
+			if (!buf)
 				goto out_bad;
 
 			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
@@ -909,28 +940,7 @@ copy_failed:
 			break;
 		}
 #endif /* CONFIG_MALI_NO_MALI */
-#ifdef BASE_LEGACY_UK10_4_SUPPORT
-	case KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4:
-		{
-			struct kbase_uk_tlstream_acquire_v10_4 *tlstream_acquire
-					= args;
 
-			if (sizeof(*tlstream_acquire) != args_size)
-				goto bad_size;
-
-			if (0 != kbase_tlstream_acquire(
-						kctx,
-						&tlstream_acquire->fd, 0)) {
-				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
-			} else if (0 <= tlstream_acquire->fd) {
-				/* Summary stream was cleared during acquire.
-				 * Create static timeline objects that will be
-				 * read by client. */
-				kbase_create_timeline_objects(kctx);
-			}
-			break;
-		}
-#endif /* BASE_LEGACY_UK10_4_SUPPORT */
 	case KBASE_FUNC_TLSTREAM_ACQUIRE:
 		{
 			struct kbase_uk_tlstream_acquire *tlstream_acquire =
@@ -939,13 +949,9 @@ copy_failed:
 			if (sizeof(*tlstream_acquire) != args_size)
 				goto bad_size;
 
-			if (tlstream_acquire->flags & ~BASE_TLSTREAM_FLAGS_MASK)
-				goto out_bad;
-
 			if (0 != kbase_tlstream_acquire(
 						kctx,
-						&tlstream_acquire->fd,
-						tlstream_acquire->flags)) {
+						&tlstream_acquire->fd)) {
 				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
 			} else if (0 <= tlstream_acquire->fd) {
 				/* Summary stream was cleared during acquire.
@@ -1128,63 +1134,6 @@ void kbase_release_device(struct kbase_device *kbdev)
 }
 EXPORT_SYMBOL(kbase_release_device);
 
-#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE
-/*
- * Older versions, before v4.6, of the kernel doesn't have
- * kstrtobool_from_user().
- */
-static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
-{
-	char buf[32];
-
-	count = min(sizeof(buf), count);
-
-	if (copy_from_user(buf, s, count))
-		return -EFAULT;
-	buf[count] = '\0';
-
-	return strtobool(buf, res);
-}
-#endif
-
-static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off)
-{
-	struct kbase_context *kctx = f->private_data;
-	int err;
-	bool value;
-
-	err = kstrtobool_from_user(ubuf, size, &value);
-	if (err)
-		return err;
-
-	if (value)
-		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
-	else
-		kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE);
-
-	return size;
-}
-
-static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off)
-{
-	struct kbase_context *kctx = f->private_data;
-	char buf[32];
-	int count;
-	bool value;
-
-	value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE);
-
-	count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N");
-
-	return simple_read_from_buffer(ubuf, size, off, buf, count);
-}
-
-static const struct file_operations kbase_infinite_cache_fops = {
-	.open = simple_open,
-	.write = write_ctx_infinite_cache,
-	.read = read_ctx_infinite_cache,
-};
-
 static int kbase_open(struct inode *inode, struct file *filp)
 {
 	struct kbase_device *kbdev = NULL;
@@ -1209,8 +1158,7 @@ static int kbase_open(struct inode *inode, struct file *filp)
 	filp->private_data = kctx;
 	kctx->filp = filp;
 
-	if (kbdev->infinite_cache_active_default)
-		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
+	kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
 
 #ifdef CONFIG_DEBUG_FS
 	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
@@ -1228,20 +1176,20 @@ static int kbase_open(struct inode *inode, struct file *filp)
 	  * infinite cache control support from debugfs.
 	  */
 #else
-	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
-			    kctx, &kbase_infinite_cache_fops);
+	debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+			(bool*)&(kctx->infinite_cache_active));
 #endif /* CONFIG_MALI_COH_USER */
 
 	mutex_init(&kctx->mem_profile_lock);
 
-	kbasep_jd_debugfs_ctx_init(kctx);
+	kbasep_jd_debugfs_ctx_add(kctx);
 	kbase_debug_mem_view_init(filp);
 
 	kbase_debug_job_fault_context_init(kctx);
 
-	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool);
+	kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
 
-	kbase_jit_debugfs_init(kctx);
+	kbase_jit_debugfs_add(kctx);
 #endif /* CONFIG_DEBUG_FS */
 
 	dev_dbg(kbdev->dev, "created base context\n");
@@ -1538,7 +1486,7 @@ static unsigned long kbase_get_unmapped_area(struct file *filp,
 	if (len > TASK_SIZE - SZ_2M)
 		return -ENOMEM;
 
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kctx->is_compat)
 		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
 				flags);
 
@@ -1888,12 +1836,13 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
 						new_core_mask[2]) {
 			unsigned long flags;
 
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
 			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
 					new_core_mask[1], new_core_mask[2]);
 
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
+					flags);
 		}
 
 		return count;
@@ -2043,7 +1992,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
 		struct kbasep_js_device_data *js_data = &kbdev->js_data;
 		unsigned long flags;
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 
 #define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
 	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
@@ -2078,7 +2027,7 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
 
 		kbase_js_set_timeouts(kbdev);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 		return count;
 	}
@@ -2227,7 +2176,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 
 	/* Update scheduling timeouts */
 	mutex_lock(&js_data->runpool_mutex);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_data->runpool_irq.lock, flags);
 
 	/* If no contexts have been scheduled since js_timeouts was last written
 	 * to, the new timeouts might not have been latched yet. So check if an
@@ -2257,7 +2206,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 
 	kbase_js_set_timeouts(kbdev);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags);
 	mutex_unlock(&js_data->runpool_mutex);
 
 	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
@@ -2591,8 +2540,6 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
 		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G71" },
-		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-THEx" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -3085,8 +3032,7 @@ static int power_control_init(struct platform_device *pdev)
 
 #if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
 	/* Register the OPPs if they are available in device tree */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
-	|| defined(LSK_OPPV2_BACKPORT)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
 	err = dev_pm_opp_of_add_table(kbdev->dev);
 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
 	err = of_init_opp_table(kbdev->dev);
@@ -3178,48 +3124,6 @@ MAKE_QUIRK_ACCESSORS(mmu);
 
 #endif /* KBASE_GPU_RESET_EN */
 
-/**
- * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
- * @file: File object to read is for
- * @buf:  User buffer to populate with data
- * @len:  Length of user buffer
- * @ppos: Offset within file object
- *
- * Retrieves the current status of protected debug mode
- * (0 = disabled, 1 = enabled)
- *
- * Return: Number of bytes added to user buffer
- */
-static ssize_t debugfs_protected_debug_mode_read(struct file *file,
-				char __user *buf, size_t len, loff_t *ppos)
-{
-	struct kbase_device *kbdev = (struct kbase_device *)file->private_data;
-	u32 gpu_status;
-	ssize_t ret_val;
-
-	kbase_pm_context_active(kbdev);
-	gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
-	kbase_pm_context_idle(kbdev);
-
-	if (gpu_status & GPU_DBGEN)
-		ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2);
-	else
-		ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2);
-
-	return ret_val;
-}
-
-/*
- * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops
- *
- * Contains the file operations for the "protected_debug_mode" debugfs file
- */
-static const struct file_operations fops_protected_debug_mode = {
-	.open = simple_open,
-	.read = debugfs_protected_debug_mode_read,
-	.llseek = default_llseek,
-};
-
 static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 {
 	struct dentry *debugfs_ctx_defaults_directory;
@@ -3250,9 +3154,8 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 	}
 
 #if !MALI_CUSTOMER_RELEASE
-	kbasep_regs_dump_debugfs_init(kbdev);
+	kbasep_regs_dump_debugfs_add(kbdev);
 #endif /* !MALI_CUSTOMER_RELEASE */
-	kbasep_regs_history_debugfs_init(kbdev);
 
 	kbase_debug_job_fault_debugfs_init(kbdev);
 	kbasep_gpu_memory_debugfs_init(kbdev);
@@ -3279,12 +3182,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 			debugfs_ctx_defaults_directory,
 			&kbdev->mem_pool_max_size_default);
 
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
-		debugfs_create_file("protected_debug_mode", S_IRUGO,
-				kbdev->mali_debugfs_directory, kbdev,
-				&fops_protected_debug_mode);
-	}
-
 #if KBASE_TRACE_ENABLE
 	kbasep_trace_debugfs_init(kbdev);
 #endif /* KBASE_TRACE_ENABLE */
@@ -3507,11 +3404,6 @@ static int kbase_platform_device_remove(struct platform_device *pdev)
 		kbdev->inited_subsys &= ~inited_backend_early;
 	}
 
-	if (kbdev->inited_subsys & inited_io_history) {
-		kbase_io_history_term(&kbdev->io_history);
-		kbdev->inited_subsys &= ~inited_io_history;
-	}
-
 	if (kbdev->inited_subsys & inited_power_control) {
 		power_control_term(kbdev);
 		kbdev->inited_subsys &= ~inited_power_control;
@@ -3545,10 +3437,6 @@ static void kbase_platform_device_shutdown(struct platform_device *pdev)
 	kbase_platform_rk_shutdown(kbdev);
 }
 
-/* Number of register accesses for the buffer that we allocate during
- * initialization time. The buffer size can be changed later via debugfs. */
-#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512)
-
 static int kbase_platform_device_probe(struct platform_device *pdev)
 {
 	struct kbase_device *kbdev;
@@ -3609,15 +3497,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 	}
 	kbdev->inited_subsys |= inited_power_control;
 
-	err = kbase_io_history_init(&kbdev->io_history,
-			KBASEP_DEFAULT_REGISTER_HISTORY_SIZE);
-	if (err) {
-		dev_err(&pdev->dev, "Register access history initialization failed\n");
-		kbase_platform_device_remove(pdev);
-		return -ENOMEM;
-	}
-	kbdev->inited_subsys |= inited_io_history;
-
 	err = kbase_backend_early_init(kbdev);
 	if (err) {
 		dev_err(kbdev->dev, "Early backend initialization failed\n");
@@ -3797,9 +3676,6 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 	return err;
 }
 
-#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE
-
-
 /** Suspend callback from the OS.
  *
  * This is called by Linux when the device should suspend.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
index 845b4713ea1a..f5775bcbb248 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -34,7 +34,6 @@
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_mmu_mode.h>
 #include <mali_kbase_instr_defs.h>
-#include <mali_kbase_pm.h>
 
 #include <linux/atomic.h>
 #include <linux/mempool.h>
@@ -228,39 +227,6 @@ struct kbase_jd_atom_dependency {
 	u8 dep_type;
 };
 
-/**
- * struct kbase_io_access - holds information about 1 register access
- *
- * @addr: first bit indicates r/w (r=0, w=1)
- * @value: value written or read
- */
-struct kbase_io_access {
-	uintptr_t addr;
-	u32 value;
-};
-
-/**
- * struct kbase_io_history - keeps track of all recent register accesses
- *
- * @enabled: true if register accesses are recorded, false otherwise
- * @lock: spinlock protecting kbase_io_access array
- * @count: number of registers read/written
- * @size: number of elements in kbase_io_access array
- * @buf: array of kbase_io_access
- */
-struct kbase_io_history {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
-	bool enabled;
-#else
-	u32 enabled;
-#endif
-
-	spinlock_t lock;
-	size_t count;
-	u16 size;
-	struct kbase_io_access *buf;
-};
-
 /**
  * @brief The function retrieves a read-only reference to the atom field from
  * the  kbase_jd_atom_dependency structure
@@ -335,17 +301,15 @@ enum kbase_atom_gpu_rb_state {
 	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
 	/* Atom is in slot ringbuffer but is blocked on a previous atom */
 	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
-	/* Atom is in slot ringbuffer but is waiting for a previous protected
-	 * mode transition to complete */
-	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
-	/* Atom is in slot ringbuffer but is waiting for proected mode
-	 * transition */
-	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
+	/* Atom is in slot ringbuffer but is waiting for proected mode exit */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT,
 	/* Atom is in slot ringbuffer but is waiting for cores to become
 	 * available */
 	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
 	/* Atom is in slot ringbuffer but is blocked on affinity */
 	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+	/* Atom is in slot ringbuffer but is waiting for protected mode entry */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY,
 	/* Atom is in slot ringbuffer and ready to run */
 	KBASE_ATOM_GPU_RB_READY,
 	/* Atom is in slot ringbuffer and has been submitted to the GPU */
@@ -355,41 +319,20 @@ enum kbase_atom_gpu_rb_state {
 	KBASE_ATOM_GPU_RB_RETURN_TO_JS
 };
 
-enum kbase_atom_enter_protected_state {
-	/*
-	 * Starting state:
-	 * Check if a transition into protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_EXIT_PROTECTED_CHECK.
-	 */
-	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
-	/* Wait for vinstr to suspend. */
-	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
-	/* Wait for the L2 to become idle in preparation for
-	 * the coherency change. */
-	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
-	/* End state;
-	 * Prepare coherency change. */
-	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
-};
-
 enum kbase_atom_exit_protected_state {
 	/*
 	 * Starting state:
 	 * Check if a transition out of protected mode is required.
-	 *
-	 * NOTE: The integer value of this must
-	 *       match KBASE_ATOM_ENTER_PROTECTED_CHECK.
 	 */
-	KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
-	/* Wait for the L2 to become idle in preparation
-	 * for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK,
+	/* Wait for the L2 to become idle in preparation for the reset. */
 	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
 	/* Issue the protected reset. */
 	KBASE_ATOM_EXIT_PROTECTED_RESET,
-	/* End state;
-	 * Wait for the reset to complete. */
+	/*
+	 * End state;
+	 * Wait for the reset to complete.
+	 */
 	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
 };
 
@@ -554,13 +497,7 @@ struct kbase_jd_atom {
 	 * event_code when the atom is processed. */
 	enum base_jd_event_code will_fail_event_code;
 
-	/* Atoms will only ever be transitioning into, or out of
-	 * protected mode so we do not need two separate fields.
-	 */
-	union {
-		enum kbase_atom_enter_protected_state enter;
-		enum kbase_atom_exit_protected_state exit;
-	} protected_state;
+	enum kbase_atom_exit_protected_state exit_protected_state;
 
 	struct rb_node runnable_tree_node;
 
@@ -663,19 +600,19 @@ struct kbase_as {
 	struct work_struct work_pagefault;
 	struct work_struct work_busfault;
 	enum kbase_mmu_fault_type fault_type;
-	bool protected_mode;
 	u32 fault_status;
 	u64 fault_addr;
 	u64 fault_extra_addr;
+	struct mutex transaction_mutex;
 
 	struct kbase_mmu_setup current_setup;
 
 	/* BASE_HW_ISSUE_8316  */
 	struct workqueue_struct *poke_wq;
 	struct work_struct poke_work;
-	/** Protected by hwaccess_lock */
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
 	int poke_refcount;
-	/** Protected by hwaccess_lock */
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
 	kbase_as_poke_state poke_state;
 	struct hrtimer poke_timer;
 };
@@ -796,7 +733,8 @@ struct kbase_trace_kbdev_timeline {
 	 * But it's kept as an example of how to add global timeline tracking
 	 * information
 	 *
-	 * The caller must hold hwaccess_lock when accessing this */
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
 	u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
 
 	/* Last UID for each PM event */
@@ -805,7 +743,7 @@ struct kbase_trace_kbdev_timeline {
 	atomic_t pm_event_uid_counter;
 	/*
 	 * L2 transition state - true indicates that the transition is ongoing
-	 * Expected to be protected by hwaccess_lock */
+	 * Expected to be protected by pm.power_change_lock */
 	bool l2_transitioning;
 };
 #endif /* CONFIG_MALI_TRACE_TIMELINE */
@@ -846,6 +784,19 @@ struct kbase_pm_device_data {
 	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
 	u64 debug_core_mask_all;
 
+	/**
+	 * Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap,
+	 * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+	 * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+	 * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+	 * also held when the hardware power registers are being written to, to
+	 * ensure that two threads do not conflict over the power transitions
+	 * that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
 	/**
 	 * Callback for initializing the runtime power management.
 	 *
@@ -1168,11 +1119,6 @@ struct kbase_device {
 	/* Total number of created contexts */
 	atomic_t ctx_num;
 
-#ifdef CONFIG_DEBUG_FS
-	/* Holds the most recent register accesses */
-	struct kbase_io_history io_history;
-#endif /* CONFIG_DEBUG_FS */
-
 	struct kbase_hwaccess_data hwaccess;
 
 	/* Count of page/bus faults waiting for workqueues to process */
@@ -1190,8 +1136,6 @@ struct kbase_device {
 #endif
 	size_t mem_pool_max_size_default;
 
-	/* current gpu coherency mode */
-	u32 current_gpu_coherency_mode;
 	/* system coherency mode  */
 	u32 system_coherency;
 	/* Flag to track when cci snoops have been enabled on the interface */
@@ -1238,11 +1182,6 @@ struct kbase_device {
 
 	/* list of inited sub systems. Used during terminate/error recovery */
 	u32 inited_subsys;
-
-	spinlock_t hwaccess_lock;
-
-	/* Protects access to MMU operations */
-	struct mutex mmu_hw_mutex;
 };
 
 /**
@@ -1253,7 +1192,7 @@ struct kbase_device {
  *                 dependencies. Atoms on this list will be moved to the
  *                 runnable_tree when the blocking atom completes.
  *
- * hwaccess_lock must be held when accessing this structure.
+ * runpool_irq.lock must be held when accessing this structure.
  */
 struct jsctx_queue {
 	struct rb_root runnable_tree;
@@ -1265,52 +1204,6 @@ struct jsctx_queue {
 					 (((minor) & 0xFFF) << 8) | \
 					 ((0 & 0xFF) << 0))
 
-/**
- * enum kbase_context_flags - Flags for kbase contexts
- *
- * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
- * process on a 64-bit kernel.
- *
- * @KCTX_RUNNABLE_REF: Set when context is counted in
- * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
- *
- * @KCTX_ACTIVE: Set when the context is active.
- *
- * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
- * context.
- *
- * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
- * initialized.
- *
- * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
- * allocations. Existing allocations will not change.
- *
- * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
- *
- * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
- * scheduled in.
- *
- * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
- * This is only ever updated whilst the jsctx_mutex is held.
- *
- * @KCTX_DYING: Set when the context process is in the process of being evicted.
- *
- * All members need to be separate bits. This enum is intended for use in a
- * bitmask where multiple values get OR-ed together.
- */
-enum kbase_context_flags {
-	KCTX_COMPAT = 1U << 0,
-	KCTX_RUNNABLE_REF = 1U << 1,
-	KCTX_ACTIVE = 1U << 2,
-	KCTX_PULLED = 1U << 3,
-	KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
-	KCTX_INFINITE_CACHE = 1U << 5,
-	KCTX_SUBMIT_DISABLED = 1U << 6,
-	KCTX_PRIVILEGED = 1U << 7,
-	KCTX_SCHEDULED = 1U << 8,
-	KCTX_DYING = 1U << 9,
-};
-
 struct kbase_context {
 	struct file *filp;
 	struct kbase_device *kbdev;
@@ -1325,7 +1218,7 @@ struct kbase_context {
 	atomic_t event_count;
 	int event_coalesce_count;
 
-	atomic_t flags;
+	bool is_compat;
 
 	atomic_t                setup_complete;
 	atomic_t                setup_in_progress;
@@ -1369,11 +1262,12 @@ struct kbase_context {
 	/** This is effectively part of the Run Pool, because it only has a valid
 	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
 	 *
-	 * The hwaccess_lock must be held whilst accessing this.
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
 	 *
 	 * If the context relating to this as_nr is required, you must use
 	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
-	 * whilst you're using it. Alternatively, just hold the hwaccess_lock
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
 	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
 	 * you can take whilst doing this) */
 	int as_nr;
@@ -1399,7 +1293,8 @@ struct kbase_context {
 	size_t mem_profile_size;
 	/* Mutex guarding memory profile state */
 	struct mutex mem_profile_lock;
-	/* Memory profile directory under debugfs */
+	/* Memory profile file created */
+	bool mem_profile_initialized;
 	struct dentry *kctx_dentry;
 
 	/* for job fault debug */
@@ -1419,6 +1314,15 @@ struct kbase_context {
 	atomic_t atoms_pulled;
 	/* Number of atoms currently pulled from this context, per slot */
 	atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+	/* true if last kick() caused atoms to be pulled from this context */
+	bool pulled;
+	/* true if infinite cache is to be enabled for new allocations. Existing
+	 * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active;
+#else
+	u32 infinite_cache_active;
+#endif
 	/* Bitmask of slots that can be pulled from */
 	u32 slots_pullable;
 
@@ -1432,11 +1336,17 @@ struct kbase_context {
 	struct kbase_vinstr_client *vinstr_cli;
 	struct mutex vinstr_cli_lock;
 
+	/* Must hold queue_mutex when accessing */
+	bool ctx_active;
+
 	/* List of completed jobs waiting for events to be posted */
 	struct list_head completed_jobs;
 	/* Number of work items currently pending on job_done_wq */
 	atomic_t work_count;
 
+	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+	bool ctx_runnable_ref;
+
 	/* Waiting soft-jobs will fail when this timer expires */
 	struct timer_list soft_job_timeout;
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
index 7484eec5108f..62ab0caf9858 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -82,6 +82,7 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i)
 	if (!kbdev->as[i].pf_wq)
 		return -EINVAL;
 
+	mutex_init(&kbdev->as[i].transaction_mutex);
 	INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
 	INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
 
@@ -150,7 +151,6 @@ int kbase_device_init(struct kbase_device * const kbdev)
 #endif /* CONFIG_ARM64 */
 
 	spin_lock_init(&kbdev->mmu_mask_change);
-	mutex_init(&kbdev->mmu_hw_mutex);
 #ifdef CONFIG_ARM64
 	kbdev->cci_snoop_enabled = false;
 	np = kbdev->dev->of_node;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
index 97bb6c5db76c..4d3836a1d4ab 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c
@@ -38,8 +38,6 @@
 /* Spin lock protecting all Mali fences as fence->lock. */
 static DEFINE_SPINLOCK(kbase_dma_fence_lock);
 
-static void
-kbase_dma_fence_work(struct work_struct *pwork);
 
 static void
 kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom)
@@ -169,30 +167,9 @@ kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info,
 	ww_acquire_fini(ctx);
 }
 
-/**
- * kbase_dma_fence_queue_work() - Queue work to handle @katom
- * @katom: Pointer to atom for which to queue work
- *
- * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and
- * submit the atom.
- */
-static void
-kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
-{
-	struct kbase_context *kctx = katom->kctx;
-	bool ret;
-
-	INIT_WORK(&katom->work, kbase_dma_fence_work);
-	ret = queue_work(kctx->dma_fence.wq, &katom->work);
-	/* Warn if work was already queued, that should not happen. */
-	WARN_ON(!ret);
-}
-
 /**
  * kbase_dma_fence_free_callbacks - Free dma-fence callbacks on a katom
  * @katom: Pointer to katom
- * @queue_worker: Boolean indicating if fence worker is to be queued when
- *                dep_count reaches 0.
  *
  * This function will free all fence callbacks on the katom's list of
  * callbacks. Callbacks that have not yet been called, because their fence
@@ -201,7 +178,7 @@ kbase_dma_fence_queue_work(struct kbase_jd_atom *katom)
  * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held.
  */
 static void
-kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
+kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom)
 {
 	struct kbase_dma_fence_cb *cb, *tmp;
 
@@ -214,21 +191,10 @@ kbase_dma_fence_free_callbacks(struct kbase_jd_atom *katom, bool queue_worker)
 		/* Cancel callbacks that hasn't been called yet. */
 		ret = fence_remove_callback(cb->fence, &cb->fence_cb);
 		if (ret) {
-			int ret;
-
 			/* Fence had not signaled, clean up after
 			 * canceling.
 			 */
-			ret = atomic_dec_return(&katom->dma_fence.dep_count);
-
-			if (unlikely(queue_worker && ret == 0)) {
-				/*
-				 * dep_count went to zero and queue_worker is
-				 * true. Queue the worker to handle the
-				 * completion of the katom.
-				 */
-				kbase_dma_fence_queue_work(katom);
-			}
+			atomic_dec(&katom->dma_fence.dep_count);
 		}
 
 		/*
@@ -253,7 +219,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
 	lockdep_assert_held(&katom->kctx->jctx.lock);
 
 	/* Cancel callbacks and clean up. */
-	kbase_dma_fence_free_callbacks(katom, false);
+	kbase_dma_fence_free_callbacks(katom);
 
 	KBASE_DEBUG_ASSERT(atomic_read(&katom->dma_fence.dep_count) == 0);
 
@@ -298,15 +264,9 @@ kbase_dma_fence_work(struct work_struct *pwork)
 	/* Remove atom from list of dma-fence waiting atoms. */
 	kbase_dma_fence_waiters_remove(katom);
 	/* Cleanup callbacks. */
-	kbase_dma_fence_free_callbacks(katom, false);
-	/*
-	 * Queue atom on GPU, unless it has already completed due to a failing
-	 * dependency. Run jd_done_nolock() on the katom if it is completed.
-	 */
-	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
-		jd_done_nolock(katom, NULL);
-	else
-		kbase_jd_dep_clear_locked(katom);
+	kbase_dma_fence_free_callbacks(katom);
+	/* Queue atom on GPU. */
+	kbase_jd_dep_clear_locked(katom);
 
 out:
 	mutex_unlock(&ctx->lock);
@@ -372,13 +332,20 @@ kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb)
 				struct kbase_dma_fence_cb,
 				fence_cb);
 	struct kbase_jd_atom *katom = kcb->katom;
+	struct kbase_context *kctx = katom->kctx;
 
 	/* If the atom is zapped dep_count will be forced to a negative number
 	 * preventing this callback from ever scheduling work. Which in turn
 	 * would reschedule the atom.
 	 */
-	if (atomic_dec_and_test(&katom->dma_fence.dep_count))
-		kbase_dma_fence_queue_work(katom);
+	if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
+		bool ret;
+
+		INIT_WORK(&katom->work, kbase_dma_fence_work);
+		ret = queue_work(kctx->dma_fence.wq, &katom->work);
+		/* Warn if work was already queued, that should not happen. */
+		WARN_ON(!ret);
+	}
 }
 
 static int
@@ -439,7 +406,7 @@ out:
 		 * On error, cancel and clean up all callbacks that was set up
 		 * before the error.
 		 */
-		kbase_dma_fence_free_callbacks(katom, false);
+		kbase_dma_fence_free_callbacks(katom);
 	}
 
 	return err;
@@ -532,7 +499,7 @@ end:
 		/* Test if the callbacks are already triggered */
 		if (atomic_dec_and_test(&katom->dma_fence.dep_count)) {
 			atomic_set(&katom->dma_fence.dep_count, -1);
-			kbase_dma_fence_free_callbacks(katom, false);
+			kbase_dma_fence_free_callbacks(katom);
 		} else {
 			/* Add katom to the list of dma-buf fence waiting atoms
 			 * only if it is still waiting.
@@ -545,7 +512,7 @@ end:
 		 * kill it for us), signal the fence, free callbacks and the
 		 * fence.
 		 */
-		kbase_dma_fence_free_callbacks(katom, false);
+		kbase_dma_fence_free_callbacks(katom);
 		atomic_set(&katom->dma_fence.dep_count, -1);
 		kbase_dma_fence_signal(katom);
 	}
@@ -555,12 +522,10 @@ end:
 
 void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
 {
-	struct list_head *list = &kctx->dma_fence.waiting_resource;
-
-	while (!list_empty(list)) {
-		struct kbase_jd_atom *katom;
+	struct kbase_jd_atom *katom, *katom_tmp;
 
-		katom = list_first_entry(list, struct kbase_jd_atom, queue);
+	list_for_each_entry_safe(katom, katom_tmp,
+				 &kctx->dma_fence.waiting_resource, queue) {
 		kbase_dma_fence_waiters_remove(katom);
 		kbase_dma_fence_cancel_atom(katom);
 	}
@@ -569,7 +534,7 @@ void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx)
 void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom)
 {
 	/* Cancel callbacks and clean up. */
-	kbase_dma_fence_free_callbacks(katom, true);
+	kbase_dma_fence_free_callbacks(katom);
 }
 
 void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
@@ -584,7 +549,7 @@ void kbase_dma_fence_signal(struct kbase_jd_atom *katom)
 	fence_put(katom->dma_fence.fence);
 	katom->dma_fence.fence = NULL;
 
-	kbase_dma_fence_free_callbacks(katom, false);
+	kbase_dma_fence_free_callbacks(katom);
 }
 
 void kbase_dma_fence_term(struct kbase_context *kctx)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
index f07406c3d5ab..bf8c304610eb 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -180,7 +180,7 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 		kbase_event_process_noreport(ctx, atom);
 		return;
 	}
-	kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_POSTED);
+
 	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
 		/* Don't report the event until other event(s) have completed */
 		mutex_lock(&ctx->event_mutex);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
index 3292fa95fddd..4af3e4815e95 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -41,6 +41,7 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 {
 	const char * const *hardware_counters;
 	struct kbase_device *kbdev;
+	uint32_t gpu_id;
 	uint32_t product_id;
 	uint32_t count;
 
@@ -52,27 +53,25 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 	if (!kbdev)
 		return NULL;
 
-	product_id = kbdev->gpu_props.props.core_props.product_id;
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-		switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
 		case GPU_ID2_PRODUCT_TMIX:
 			hardware_counters = hardware_counters_mali_tMIx;
 			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
 			break;
-		case GPU_ID2_PRODUCT_THEX:
-			hardware_counters = hardware_counters_mali_tHEx;
-			count = ARRAY_SIZE(hardware_counters_mali_tHEx);
-			break;
 		default:
 			hardware_counters = NULL;
 			count = 0;
-			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
-				product_id);
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
 			break;
 		}
 	} else {
-		switch (product_id) {
+		switch (gpu_id) {
 			/* If we are using a Mali-T60x device */
 		case GPU_ID_PI_T60X:
 			hardware_counters = hardware_counters_mali_t60x;
@@ -116,8 +115,8 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 		default:
 			hardware_counters = NULL;
 			count = 0;
-			dev_err(kbdev->dev, "Unrecognized product ID: %u\n",
-				product_id);
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
 			break;
 		}
 	}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
index 7ec05c1c7aa3..c247dd698e19 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -2158,7 +2158,6 @@ static const char * const hardware_counters_mali_t88x[] = {
 
 #include "mali_kbase_gator_hwcnt_names_tmix.h"
 
-#include "mali_kbase_gator_hwcnt_names_thex.h"
 
 
 #endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
deleted file mode 100644
index bcceef4fc9bc..000000000000
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_thex.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-/*
- * This header was autogenerated, it should not be edited.
- */
-
-#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
-#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
-
-static const char * const hardware_counters_mali_tHEx[] = {
-	/* Performance counters for the Job Manager */
-	"",
-	"",
-	"",
-	"",
-	"THEx_MESSAGES_SENT",
-	"THEx_MESSAGES_RECEIVED",
-	"THEx_GPU_ACTIVE",
-	"THEx_IRQ_ACTIVE",
-	"THEx_JS0_JOBS",
-	"THEx_JS0_TASKS",
-	"THEx_JS0_ACTIVE",
-	"",
-	"THEx_JS0_WAIT_READ",
-	"THEx_JS0_WAIT_ISSUE",
-	"THEx_JS0_WAIT_DEPEND",
-	"THEx_JS0_WAIT_FINISH",
-	"THEx_JS1_JOBS",
-	"THEx_JS1_TASKS",
-	"THEx_JS1_ACTIVE",
-	"",
-	"THEx_JS1_WAIT_READ",
-	"THEx_JS1_WAIT_ISSUE",
-	"THEx_JS1_WAIT_DEPEND",
-	"THEx_JS1_WAIT_FINISH",
-	"THEx_JS2_JOBS",
-	"THEx_JS2_TASKS",
-	"THEx_JS2_ACTIVE",
-	"",
-	"THEx_JS2_WAIT_READ",
-	"THEx_JS2_WAIT_ISSUE",
-	"THEx_JS2_WAIT_DEPEND",
-	"THEx_JS2_WAIT_FINISH",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-
-	/* Performance counters for the Tiler */
-	"",
-	"",
-	"",
-	"",
-	"THEx_TILER_ACTIVE",
-	"THEx_JOBS_PROCESSED",
-	"THEx_TRIANGLES",
-	"THEx_LINES",
-	"THEx_POINTS",
-	"THEx_FRONT_FACING",
-	"THEx_BACK_FACING",
-	"THEx_PRIM_VISIBLE",
-	"THEx_PRIM_CULLED",
-	"THEx_PRIM_CLIPPED",
-	"THEx_PRIM_SAT_CULLED",
-	"",
-	"",
-	"THEx_BUS_READ",
-	"",
-	"THEx_BUS_WRITE",
-	"THEx_LOADING_DESC",
-	"THEx_IDVS_POS_SHAD_REQ",
-	"THEx_IDVS_POS_SHAD_WAIT",
-	"THEx_IDVS_POS_SHAD_STALL",
-	"THEx_IDVS_POS_FIFO_FULL",
-	"THEx_PREFETCH_STALL",
-	"THEx_VCACHE_HIT",
-	"THEx_VCACHE_MISS",
-	"THEx_VCACHE_LINE_WAIT",
-	"THEx_VFETCH_POS_READ_WAIT",
-	"THEx_VFETCH_VERTEX_WAIT",
-	"THEx_VFETCH_STALL",
-	"THEx_PRIMASSY_STALL",
-	"THEx_BBOX_GEN_STALL",
-	"THEx_IDVS_VBU_HIT",
-	"THEx_IDVS_VBU_MISS",
-	"THEx_IDVS_VBU_LINE_DEALLOCATE",
-	"THEx_IDVS_VAR_SHAD_REQ",
-	"THEx_IDVS_VAR_SHAD_STALL",
-	"THEx_BINNER_STALL",
-	"THEx_ITER_STALL",
-	"THEx_COMPRESS_MISS",
-	"THEx_COMPRESS_STALL",
-	"THEx_PCACHE_HIT",
-	"THEx_PCACHE_MISS",
-	"THEx_PCACHE_MISS_STALL",
-	"THEx_PCACHE_EVICT_STALL",
-	"THEx_PMGR_PTR_WR_STALL",
-	"THEx_PMGR_PTR_RD_STALL",
-	"THEx_PMGR_CMD_WR_STALL",
-	"THEx_WRBUF_ACTIVE",
-	"THEx_WRBUF_HIT",
-	"THEx_WRBUF_MISS",
-	"THEx_WRBUF_NO_FREE_LINE_STALL",
-	"THEx_WRBUF_NO_AXI_ID_STALL",
-	"THEx_WRBUF_AXI_STALL",
-	"",
-	"",
-	"",
-	"THEx_UTLB_TRANS",
-	"THEx_UTLB_TRANS_HIT",
-	"THEx_UTLB_TRANS_STALL",
-	"THEx_UTLB_TRANS_MISS_DELAY",
-	"THEx_UTLB_MMU_REQ",
-
-	/* Performance counters for the Shader Core */
-	"",
-	"",
-	"",
-	"",
-	"THEx_FRAG_ACTIVE",
-	"THEx_FRAG_PRIMITIVES",
-	"THEx_FRAG_PRIM_RAST",
-	"THEx_FRAG_FPK_ACTIVE",
-	"THEx_FRAG_STARVING",
-	"THEx_FRAG_WARPS",
-	"THEx_FRAG_PARTIAL_WARPS",
-	"THEx_FRAG_QUADS_RAST",
-	"THEx_FRAG_QUADS_EZS_TEST",
-	"THEx_FRAG_QUADS_EZS_UPDATE",
-	"THEx_FRAG_QUADS_EZS_KILL",
-	"THEx_FRAG_LZS_TEST",
-	"THEx_FRAG_LZS_KILL",
-	"",
-	"THEx_FRAG_PTILES",
-	"THEx_FRAG_TRANS_ELIM",
-	"THEx_QUAD_FPK_KILLER",
-	"",
-	"THEx_COMPUTE_ACTIVE",
-	"THEx_COMPUTE_TASKS",
-	"THEx_COMPUTE_WARPS",
-	"THEx_COMPUTE_STARVING",
-	"THEx_EXEC_CORE_ACTIVE",
-	"THEx_EXEC_ACTIVE",
-	"THEx_EXEC_INSTR_COUNT",
-	"THEx_EXEC_INSTR_DIVERGED",
-	"THEx_EXEC_INSTR_STARVING",
-	"THEx_ARITH_INSTR_SINGLE_FMA",
-	"THEx_ARITH_INSTR_DOUBLE",
-	"THEx_ARITH_INSTR_MSG",
-	"THEx_ARITH_INSTR_MSG_ONLY",
-	"THEx_TEX_INSTR",
-	"THEx_TEX_INSTR_MIPMAP",
-	"THEx_TEX_INSTR_COMPRESSED",
-	"THEx_TEX_INSTR_3D",
-	"THEx_TEX_INSTR_TRILINEAR",
-	"THEx_TEX_COORD_ISSUE",
-	"THEx_TEX_COORD_STALL",
-	"THEx_TEX_STARVE_CACHE",
-	"THEx_TEX_STARVE_FILTER",
-	"THEx_LS_MEM_READ_FULL",
-	"THEx_LS_MEM_READ_SHORT",
-	"THEx_LS_MEM_WRITE_FULL",
-	"THEx_LS_MEM_WRITE_SHORT",
-	"THEx_LS_MEM_ATOMIC",
-	"THEx_VARY_INSTR",
-	"THEx_VARY_SLOT_32",
-	"THEx_VARY_SLOT_16",
-	"THEx_ATTR_INSTR",
-	"THEx_ARITH_INSTR_FP_MUL",
-	"THEx_BEATS_RD_FTC",
-	"THEx_BEATS_RD_FTC_EXT",
-	"THEx_BEATS_RD_LSC",
-	"THEx_BEATS_RD_LSC_EXT",
-	"THEx_BEATS_RD_TEX",
-	"THEx_BEATS_RD_TEX_EXT",
-	"THEx_BEATS_RD_OTHER",
-	"THEx_BEATS_WR_LSC",
-	"THEx_BEATS_WR_TIB",
-	"",
-
-	/* Performance counters for the Memory System */
-	"",
-	"",
-	"",
-	"",
-	"THEx_MMU_REQUESTS",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"THEx_L2_RD_MSG_IN",
-	"THEx_L2_RD_MSG_IN_STALL",
-	"THEx_L2_WR_MSG_IN",
-	"THEx_L2_WR_MSG_IN_STALL",
-	"THEx_L2_SNP_MSG_IN",
-	"THEx_L2_SNP_MSG_IN_STALL",
-	"THEx_L2_RD_MSG_OUT",
-	"THEx_L2_RD_MSG_OUT_STALL",
-	"THEx_L2_WR_MSG_OUT",
-	"THEx_L2_ANY_LOOKUP",
-	"THEx_L2_READ_LOOKUP",
-	"THEx_L2_WRITE_LOOKUP",
-	"THEx_L2_EXT_SNOOP_LOOKUP",
-	"THEx_L2_EXT_READ",
-	"THEx_L2_EXT_READ_NOSNP",
-	"THEx_L2_EXT_READ_UNIQUE",
-	"THEx_L2_EXT_READ_BEATS",
-	"THEx_L2_EXT_AR_STALL",
-	"THEx_L2_EXT_AR_CNT_Q1",
-	"THEx_L2_EXT_AR_CNT_Q2",
-	"THEx_L2_EXT_AR_CNT_Q3",
-	"THEx_L2_EXT_RRESP_0_127",
-	"THEx_L2_EXT_RRESP_128_191",
-	"THEx_L2_EXT_RRESP_192_255",
-	"THEx_L2_EXT_RRESP_256_319",
-	"THEx_L2_EXT_RRESP_320_383",
-	"THEx_L2_EXT_WRITE",
-	"THEx_L2_EXT_WRITE_NOSNP_FULL",
-	"THEx_L2_EXT_WRITE_NOSNP_PTL",
-	"THEx_L2_EXT_WRITE_SNP_FULL",
-	"THEx_L2_EXT_WRITE_SNP_PTL",
-	"THEx_L2_EXT_WRITE_BEATS",
-	"THEx_L2_EXT_W_STALL",
-	"THEx_L2_EXT_AW_CNT_Q1",
-	"THEx_L2_EXT_AW_CNT_Q2",
-	"THEx_L2_EXT_AW_CNT_Q3",
-	"THEx_L2_EXT_SNOOP",
-	"THEx_L2_EXT_SNOOP_STALL",
-	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
-	"THEx_L2_EXT_SNOOP_RESP_DATA",
-	"THEx_L2_EXT_SNOOP_INTERNAL",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-	"",
-};
-
-#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
index a3377b27d487..a962ecb3f9c6 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -96,7 +96,6 @@
 		    GPU_ID2_PRODUCT_MODEL)
 
 #define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
-#define GPU_ID2_PRODUCT_THEX              GPU_ID2_MODEL_MAKE(6, 1)
 
 /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
 #define GPU_ID_S_15DEV0                   0x1
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
index 1d7e5e9b2c18..de2461fb8de4 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hw.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -42,9 +42,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 		case GPU_ID2_PRODUCT_TMIX:
 			features = base_hw_features_tMIx;
 			break;
-		case GPU_ID2_PRODUCT_THEX:
-			features = base_hw_features_tHEx;
-			break;
 		default:
 			features = base_hw_features_generic;
 			break;
@@ -109,9 +106,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
 							GPU_ID2_PRODUCT_TMIX) {
 					issues = base_hw_issues_tMIx_r0p0;
-				} else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
-							GPU_ID2_PRODUCT_THEX) {
-					issues = base_hw_issues_tHEx_r0p0;
 				} else {
 					dev_err(kbdev->dev,
 						"Unknown GPU ID %x", gpu_id);
@@ -221,9 +215,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 			case GPU_ID2_PRODUCT_TMIX:
 				issues = base_hw_issues_model_tMIx;
 				break;
-			case GPU_ID2_PRODUCT_THEX:
-				issues = base_hw_issues_model_tHEx;
-				break;
 			default:
 				dev_err(kbdev->dev,
 					"Unknown GPU ID %x", gpu_id);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
index 0acf297192fd..261453e8f1ac 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,8 @@
 
 #include <mali_kbase_jm_defs.h>
 
-/* The hwaccess_lock (a spinlock) must be held when accessing this structure */
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
 struct kbase_hwaccess_data {
 	struct kbase_context *active_kctx;
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
index c2c3909caa7b..abe66078029f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -33,17 +33,6 @@
 void kbase_backend_run_atom(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom);
 
-/**
- * kbase_backend_slot_update - Update state based on slot ringbuffers
- *
- * @kbdev:  Device pointer
- *
- * Inspect the jobs in the slot ringbuffers and update state.
- *
- * This will cause jobs to be submitted to hardware if they are unblocked
- */
-void kbase_backend_slot_update(struct kbase_device *kbdev);
-
 /**
  * kbase_backend_find_free_address_space() - Find a free address space.
  * @kbdev:	Device pointer
@@ -99,7 +88,7 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
  * the context is not scheduled, then kbase_gpu_use_ctx() should be used
  * instead.
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return: true if context is now active, false otherwise (ie if context does
  *	   not have an address space assigned)
@@ -113,7 +102,7 @@ bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
  * @kbdev: Device pointer
  * @kctx:  Context pointer
  *
- * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
  */
 void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
 				struct kbase_context *kctx);
@@ -124,7 +113,7 @@ void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
  * @kbdev: Device pointer
  * @kctx:  Context pointer
  *
- * Caller must hold kbase_device->mmu_hw_mutex
+ * Caller must hold as->transaction_mutex
  *
  * This function must perform any operations that could not be performed in IRQ
  * context by kbase_backend_release_ctx_irq().
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
index 81952e2d146f..3e0a5892cc7a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -61,7 +61,7 @@ static void __user *
 get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
 {
 #ifdef CONFIG_COMPAT
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kctx->is_compat)
 		return compat_ptr(p->compat_value);
 #endif
 	return p->value;
@@ -581,20 +581,24 @@ static inline void jd_resolve_dep(struct list_head *out_list,
 			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
 			if (likely(dep_count == -1)) {
 				dep_satisfied = true;
-			} else {
+			} else if (dep_count == 0) {
 				/*
-				 * There are either still active callbacks, or
-				 * all fences for this @dep_atom has signaled,
-				 * but the worker that will queue the atom has
-				 * not yet run.
+				 * All fences for this atom has signaled, but
+				 * the worker that will queue the atom has not
+				 * yet run.
 				 *
-				 * Wait for the fences to signal and the fence
-				 * worker to run and handle @dep_atom. If
-				 * @dep_atom was completed due to error on
-				 * @katom, then the fence worker will pick up
-				 * the complete status and error code set on
-				 * @dep_atom above.
+				 * Mark the atom as handled by setting
+				 * dep_count to -1 so that the worker doesn't
+				 * queue the atom again.
+				 */
+				atomic_set(&dep_atom->dma_fence.dep_count, -1);
+				/*
+				 * Remove the atom from the list of dma-fence
+				 * waiting atoms.
 				 */
+				kbase_dma_fence_waiters_remove(dep_atom);
+				dep_satisfied = true;
+			} else {
 				dep_satisfied = false;
 			}
 #endif /* CONFIG_MALI_DMA_FENCE */
@@ -661,40 +665,6 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom)
 }
 #endif
 
-/**
- * is_dep_valid - Validate that a dependency is valid for early dependency
- *                submission
- * @katom: Dependency atom to validate
- *
- * A dependency is valid if any of the following are true :
- * - It does not exist (a non-existent dependency does not block submission)
- * - It is in the job scheduler
- * - It has completed, does not have a failure event code, and has not been
- *   marked to fail in the future
- *
- * Return: true if valid, false otherwise
- */
-static bool is_dep_valid(struct kbase_jd_atom *katom)
-{
-	/* If there's no dependency then this is 'valid' from the perspective of
-	 * early dependency submission */
-	if (!katom)
-		return true;
-
-	/* Dependency must have reached the job scheduler */
-	if (katom->status < KBASE_JD_ATOM_STATE_IN_JS)
-		return false;
-
-	/* If dependency has completed and has failed or will fail then it is
-	 * not valid */
-	if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED &&
-			(katom->event_code != BASE_JD_EVENT_DONE ||
-			katom->will_fail_event_code))
-		return false;
-
-	return true;
-}
-
 static void jd_try_submitting_deps(struct list_head *out_list,
 		struct kbase_jd_atom *node)
 {
@@ -709,41 +679,14 @@ static void jd_try_submitting_deps(struct list_head *out_list,
 
 			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
 				/*Check if atom deps look sane*/
-				bool dep0_valid = is_dep_valid(
-						dep_atom->dep[0].atom);
-				bool dep1_valid = is_dep_valid(
-						dep_atom->dep[1].atom);
-				bool dep_satisfied = true;
-#ifdef CONFIG_MALI_DMA_FENCE
-				int dep_count;
-
-				dep_count = atomic_read(
-						&dep_atom->dma_fence.dep_count);
-				if (likely(dep_count == -1)) {
-					dep_satisfied = true;
-				} else {
-				/*
-				 * There are either still active callbacks, or
-				 * all fences for this @dep_atom has signaled,
-				 * but the worker that will queue the atom has
-				 * not yet run.
-				 *
-				 * Wait for the fences to signal and the fence
-				 * worker to run and handle @dep_atom. If
-				 * @dep_atom was completed due to error on
-				 * @katom, then the fence worker will pick up
-				 * the complete status and error code set on
-				 * @dep_atom above.
-				 */
-					dep_satisfied = false;
-				}
-#endif /* CONFIG_MALI_DMA_FENCE */
-#ifdef CONFIG_KDS
-				dep_satisfied = dep_satisfied &&
-						dep_atom->kds_dep_satisfied;
-#endif
-
-				if (dep0_valid && dep1_valid && dep_satisfied) {
+				bool dep0_valid = !dep_atom->dep[0].atom ||
+						(dep_atom->dep[0].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+				bool dep1_valid = !dep_atom->dep[1].atom ||
+						(dep_atom->dep[1].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+
+				if (dep0_valid && dep1_valid) {
 					dep_atom->in_jd_list = true;
 					list_add(&dep_atom->jd_item, out_list);
 				}
@@ -815,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 
 		for (i = 0; i < 2; i++)
 			jd_resolve_dep(&runnable_jobs, katom, i,
-					kbase_ctx_flag(kctx, KCTX_DYING));
+					kctx->jctx.sched_info.ctx.is_dying);
 
 		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
 			kbase_jd_post_external_resources(katom);
@@ -831,7 +774,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
 
 			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
-					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+					!kctx->jctx.sched_info.ctx.is_dying) {
 				need_to_try_schedule_context |= jd_run_atom(node);
 			} else {
 				node->event_code = katom->event_code;
@@ -976,10 +919,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
 	katom->x_pre_dep = NULL;
 	katom->x_post_dep = NULL;
 	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
-
-	/* Implicitly sets katom->protected_state.enter as well. */
-	katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
-
+	katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 	katom->age = kctx->age_count++;
 
 	INIT_LIST_HEAD(&katom->jd_item);
@@ -993,8 +933,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
 	atomic_set(&katom->dma_fence.dep_count, -1);
 #endif
 
-	kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_IDLE);
-
 	/* Don't do anything if there is a mess up with dependencies.
 	   This is done in a separate cycle to check both the dependencies at ones, otherwise
 	   it will be extra complexity to deal with 1st dependency ( just added to the list )
@@ -1099,17 +1037,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
 		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
 	}
 
-	/* For invalid priority, be most lenient and choose the default */
-	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
-	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
-		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
-	katom->sched_priority = sched_prio;
-
 	/* Create a new atom recording all dependencies it was set up with. */
 	kbase_tlstream_tl_new_atom(
 			katom,
 			kbase_jd_atom_id(kctx, katom));
-	kbase_tlstream_tl_attrib_atom_priority(katom, katom->sched_priority);
 	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
 	for (i = 0; i < 2; i++)
 		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
@@ -1161,6 +1092,12 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us
 		goto out;
 	}
 
+	/* For invalid priority, be most lenient and choose the default */
+	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+	katom->sched_priority = sched_prio;
+
 	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
 		/* handle what we need to do to access the external resources */
 		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
@@ -1275,7 +1212,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 
 	beenthere(kctx, "%s", "Enter");
 
-	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
 		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
 		return -EINVAL;
 	}
@@ -1437,6 +1374,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 	struct kbasep_js_device_data *js_devdata;
 	u64 cache_jc = katom->jc;
 	struct kbasep_js_atom_retained_state katom_retained_state;
+	bool schedule = false;
 	bool context_idle;
 	base_jd_core_req core_req = katom->core_req;
 	u64 affinity = katom->affinity;
@@ -1459,7 +1397,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 	 * Begin transaction on JD context and JS context
 	 */
 	mutex_lock(&jctx->lock);
-	kbase_tlstream_tl_attrib_atom_state(katom, TL_ATOM_STATE_DONE);
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -1467,7 +1404,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 	 * because it only happens in response to an IRQ from a job that was
 	 * running.
 	 */
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
 
 	if (katom->event_code == BASE_JD_EVENT_STOPPED) {
 		/* Atom has been promoted to stopped */
@@ -1476,12 +1413,12 @@ void kbase_jd_done_worker(struct work_struct *data)
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
 		kbase_js_unpull(kctx, katom);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&jctx->lock);
 
 		return;
@@ -1499,6 +1436,19 @@ void kbase_jd_done_worker(struct work_struct *data)
 	/* Retain state before the katom disappears */
 	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
 
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+		mutex_unlock(&js_devdata->runpool_mutex);
+	}
+
+	if (kbasep_js_has_atom_finished(&katom_retained_state))
+		schedule = true;
+
 	context_idle = kbase_js_complete_atom_wq(kctx, katom);
 
 	KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
@@ -1508,53 +1458,54 @@ void kbase_jd_done_worker(struct work_struct *data)
 	mutex_unlock(&js_devdata->queue_mutex);
 	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
 	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
-	jd_done_nolock(katom, &kctx->completed_jobs);
+	schedule |= jd_done_nolock(katom, &kctx->completed_jobs);
 
 	/* katom may have been freed now, do not use! */
 
 	if (context_idle) {
 		unsigned long flags;
 
-		context_idle = false;
 		mutex_lock(&js_devdata->queue_mutex);
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 		/* If kbase_sched() has scheduled this context back in then
-		 * KCTX_ACTIVE will have been set after we marked it as
-		 * inactive, and another pm reference will have been taken, so
-		 * drop our reference. But do not call kbase_jm_idle_ctx(), as
-		 * the context is active and fast-starting is allowed.
+		 * ctx_active will have been set after we marked it as inactive,
+		 * and another pm reference will have been taken, so drop our
+		 * reference. But do not call kbase_jm_idle_ctx(), as the
+		 * context is active and fast-starting is allowed.
 		 *
 		 * If an atom has been fast-started then kctx->atoms_pulled will
-		 * be non-zero but KCTX_ACTIVE will still be false (as the
+		 * be non-zero but ctx_active will still be false (as the
 		 * previous pm reference has been inherited). Do NOT drop our
 		 * reference, as it has been re-used, and leave the context as
 		 * active.
 		 *
-		 * If no new atoms have been started then KCTX_ACTIVE will still
+		 * If no new atoms have been started then ctx_active will still
 		 * be false and atoms_pulled will be zero, so drop the reference
 		 * and call kbase_jm_idle_ctx().
 		 *
 		 * As the checks are done under both the queue_mutex and
-		 * hwaccess_lock is should be impossible for this to race
+		 * runpool_irq.lock is should be impossible for this to race
 		 * with the scheduler code.
 		 */
-		if (kbase_ctx_flag(kctx, KCTX_ACTIVE) ||
-		    !atomic_read(&kctx->atoms_pulled)) {
+		if (kctx->ctx_active || !atomic_read(&kctx->atoms_pulled)) {
 			/* Calling kbase_jm_idle_ctx() here will ensure that
 			 * atoms are not fast-started when we drop the
-			 * hwaccess_lock. This is not performed if
-			 * KCTX_ACTIVE is set as in that case another pm
-			 * reference has been taken and a fast-start would be
-			 * valid.
+			 * runpool_irq.lock. This is not performed if ctx_active
+			 * is set as in that case another pm reference has been
+			 * taken and a fast-start would be valid.
 			 */
-			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE))
+			if (!kctx->ctx_active)
 				kbase_jm_idle_ctx(kbdev, kctx);
-			context_idle = true;
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
+
+			kbase_pm_context_idle(kbdev);
 		} else {
-			kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+			kctx->ctx_active = true;
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					flags);
 		}
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		mutex_unlock(&js_devdata->queue_mutex);
 	}
 
@@ -1568,7 +1519,8 @@ void kbase_jd_done_worker(struct work_struct *data)
 
 	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
 
-	kbase_js_sched_all(kbdev);
+	if (schedule)
+		kbase_js_sched_all(kbdev);
 
 	if (!atomic_dec_return(&kctx->work_count)) {
 		/* If worker now idle then post all events that jd_done_nolock()
@@ -1588,9 +1540,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 	kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
 			coreref_state);
 
-	if (context_idle)
-		kbase_pm_context_idle(kbdev);
-
 	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
 }
 
@@ -1633,7 +1582,7 @@ static void jd_cancel_worker(struct work_struct *data)
 	 * any), nor must we try to schedule out the context (it's already
 	 * scheduled out).
 	 */
-	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
 
 	/* Scheduler: Remove the job from the system */
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
@@ -1671,7 +1620,7 @@ static void jd_cancel_worker(struct work_struct *data)
  *
  * Context:
  *   This can be called safely from atomic context.
- *   The caller must hold kbdev->hwaccess_lock
+ *   The caller must hold kbasep_js_device_data.runpool_irq.lock
  */
 void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
 		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
@@ -1726,7 +1675,7 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
 	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
 
 	/* This should only be done from a context that is not scheduled */
-	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
 
 	WARN_ON(work_pending(&katom->work));
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
index 6437e4212a29..0cf75f59c282 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -57,7 +57,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
 	/* General atom states */
 	mutex_lock(&kctx->jctx.lock);
 	/* JS-related states */
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
 	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
 		struct kbase_jd_atom *atom = &atoms[i];
 		s64 start_timestamp = 0;
@@ -84,7 +84,7 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
 					atom->time_spent_us * 1000 : start_timestamp)
 				);
 	}
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
 	mutex_unlock(&kctx->jctx.lock);
 
 	return 0;
@@ -110,7 +110,7 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
 	.release = single_release,
 };
 
-void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
 {
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
index 090f81651eb5..bc1878f60e8e 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -30,10 +30,10 @@
 #define MALI_JD_DEBUGFS_VERSION 1
 
 /**
- * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
  *
  * @kctx Pointer to kbase_context
  */
-void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx);
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
 
 #endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
index 0c5c6a6f78cb..63425322452b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jm.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -60,7 +60,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
 {
 	u32 ret_mask = 0;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	while (js_mask) {
 		int js = ffs(js_mask) - 1;
@@ -79,7 +79,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	if (!down_trylock(&js_devdata->schedule_sem)) {
 		kbase_jm_kick(kbdev, js_mask);
@@ -91,7 +91,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	if (!down_trylock(&js_devdata->schedule_sem)) {
 		kbase_jm_kick_all(kbdev);
@@ -101,31 +101,30 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (kbdev->hwaccess.active_kctx == kctx)
 		kbdev->hwaccess.active_kctx = NULL;
 }
 
-struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
 			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
-		return kbase_js_complete_atom(katom, NULL);
+		kbase_js_complete_atom(katom, NULL);
 	} else {
 		kbase_js_unpull(katom->kctx, katom);
-		return NULL;
 	}
 }
 
-struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
-		struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
-	return kbase_js_complete_atom(katom, end_timestamp);
+	kbase_js_complete_atom(katom, end_timestamp);
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h
index a74ee24c8058..27aca3a699f4 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
  * @kbdev:	Device pointer
  * @js_mask:	Mask of the job slots that can be pulled from.
  *
- * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
  *
  * Return: Mask of the job slots that can still be submitted to.
  */
@@ -39,7 +39,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
  *			 slots.
  * @kbdev:	Device pointer
  *
- * Caller must hold the hwaccess_lock and schedule_sem semaphore
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
  *
  * Return: Mask of the job slots that can still be submitted to.
  */
@@ -52,7 +52,7 @@ static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
  * kbase_jm_try_kick - Attempt to call kbase_jm_kick
  * @kbdev:   Device pointer
  * @js_mask: Mask of the job slots that can be pulled from
- * Context: Caller must hold hwaccess_lock
+ * Context: Caller must hold runpool_irq lock
  *
  * If schedule_sem can be immediately obtained then this function will call
  * kbase_jm_kick() otherwise it will do nothing.
@@ -62,7 +62,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
 /**
  * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
  * @kbdev:  Device pointer
- * Context: Caller must hold hwaccess_lock
+ * Context: Caller must hold runpool_irq lock
  *
  * If schedule_sem can be immediately obtained then this function will call
  * kbase_jm_kick_all() otherwise it will do nothing.
@@ -80,7 +80,7 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev);
  * The context should have no atoms currently pulled from it
  * (kctx->atoms_pulled == 0).
  *
- * Caller must hold the hwaccess_lock
+ * Caller must hold the runpool_irq lock
  */
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
@@ -90,21 +90,17 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
  *				  dependency
  * @kbdev:	Device pointer
  * @katom:	Atom that has been stopped or will be failed
- *
- * Return: Atom that has now been unblocked and can now be run, or NULL if none
  */
-struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
-			struct kbase_jd_atom *katom);
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+				struct kbase_jd_atom *katom);
 
 /**
  * kbase_jm_complete() - Complete an atom
  * @kbdev:		Device pointer
  * @katom:		Atom that has completed
  * @end_timestamp:	Timestamp of atom completion
- *
- * Return: Atom that has now been unblocked and can now be run, or NULL if none
  */
-struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
-		struct kbase_jd_atom *katom, ktime_t *end_timestamp);
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+			ktime_t *end_timestamp);
 
 #endif /* _KBASE_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
index 60a7373601c0..c591ebbcd861 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -89,7 +89,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
 
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	as_nr = kctx->as_nr;
 	if (as_nr != KBASEP_AS_NR_INVALID) {
 		struct kbasep_js_per_as_data *js_per_as_data;
@@ -98,7 +98,7 @@ static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
 
 		refcnt = js_per_as_data->as_busy_refcount;
 	}
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return refcnt;
 }
@@ -198,7 +198,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->js_data.runpool_mutex);
 }
 
-/* Hold the hwaccess_lock for this */
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
 bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
 		struct kbase_context *kctx)
 {
@@ -248,7 +248,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 {
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	return RB_EMPTY_ROOT(&rb->runnable_tree);
 }
@@ -259,7 +259,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
  * @kctx: Pointer to kbase context with ring buffer.
  * @js:   Job slot id to check.
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return: true if the ring buffers for all priorities have no pullable atoms,
  *	   false otherwise.
@@ -269,7 +269,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
 {
 	int prio;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 		if (!jsctx_rb_none_to_pull_prio(kctx, js, prio))
@@ -294,7 +294,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
  * enumerated when this function returns jsctx->lock must be held when calling
  * this function.
  *
- * The HW access lock must always be held when calling this function.
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
  */
 static void
 jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
@@ -302,7 +303,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
 {
 	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
 		struct rb_node *node = rb_first(&queue->runnable_tree);
@@ -360,7 +361,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 	struct rb_node *node;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	node = rb_first(&rb->runnable_tree);
 	if (!node)
@@ -378,7 +379,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
  * pointer to the next atom, unless all the priority's ring buffers are empty.
  *
- * Caller must hold the hwaccess_lock.
+ * Caller must hold the runpool_irq.lock.
  *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
@@ -387,7 +388,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 {
 	int prio;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 		struct kbase_jd_atom *katom;
@@ -416,7 +417,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 	int js = katom->slot_nr;
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	/* Atoms must be pulled in the correct order. */
 	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
@@ -434,7 +435,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	while (*new) {
 		struct kbase_jd_atom *entry = container_of(*new,
@@ -465,7 +466,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 static inline void
 jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	jsctx_tree_add(kctx, katom);
 }
@@ -629,7 +630,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 
 	mutex_init(&jsdd->runpool_mutex);
 	mutex_init(&jsdd->queue_mutex);
-	spin_lock_init(&kbdev->hwaccess_lock);
+	spin_lock_init(&jsdd->runpool_irq.lock);
 	sema_init(&jsdd->schedule_sem, 1);
 
 	err = kbasep_js_policy_init(kbdev);
@@ -698,14 +699,14 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
 	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
 
 	js_kctx_info->ctx.nr_jobs = 0;
-	kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
-	kbase_ctx_flag_clear(kctx, KCTX_DYING);
+	js_kctx_info->ctx.is_scheduled = false;
+	js_kctx_info->ctx.is_dying = false;
 	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
 			sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
 
 	/* Initially, the context is disabled from submission until the create
 	 * flags are set */
-	kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED);
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
 
 	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
 
@@ -752,7 +753,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 
 	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
 		/* The caller must de-register all jobs before calling this */
-		KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+		KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
 		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
 	}
 
@@ -762,11 +763,11 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
 
-	if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
+	if (kctx->ctx_runnable_ref) {
 		WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
 		atomic_dec(&kbdev->js_data.nr_contexts_runnable);
 		update_ctx_count = true;
-		kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+		kctx->ctx_runnable_ref = false;
 	}
 
 	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
@@ -788,12 +789,12 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
  * kbase_js_ctx_list_add_pullable_nolock - Variant of
  *                                         kbase_jd_ctx_list_add_pullable()
  *                                         where the caller must hold
- *                                         hwaccess_lock
+ *                                         runpool_irq.lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return: true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -803,7 +804,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -815,8 +816,8 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 		kbdev->js_data.nr_contexts_pullable++;
 		ret = true;
 		if (!atomic_read(&kctx->atoms_pulled)) {
-			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
 			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
 		}
 	}
@@ -829,12 +830,12 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
  * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
  *                                              kbase_js_ctx_list_add_pullable_head()
  *                                              where the caller must hold
- *                                              hwaccess_lock
+ *                                              runpool_irq.lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -843,7 +844,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -855,8 +856,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
 		kbdev->js_data.nr_contexts_pullable++;
 		ret = true;
 		if (!atomic_read(&kctx->atoms_pulled)) {
-			WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = true;
 			atomic_inc(&kbdev->js_data.nr_contexts_runnable);
 		}
 	}
@@ -887,9 +888,9 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
 	bool ret;
 	unsigned long flags;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 	return ret;
 }
@@ -907,7 +908,7 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
  * This function should be used when a context has been pulled from, and there
  * are no jobs remaining on the specified slot.
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -917,7 +918,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
 				&kbdev->js_data.ctx_list_unpullable[js]);
@@ -926,8 +927,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 		kbdev->js_data.nr_contexts_pullable--;
 		ret = true;
 		if (!atomic_read(&kctx->atoms_pulled)) {
-			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
 		}
 	}
@@ -948,7 +949,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
  * This function should be used when a context has no jobs on the GPU, and no
  * jobs remaining for the specified slot.
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
@@ -958,7 +959,7 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
 
@@ -968,8 +969,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
 		kbdev->js_data.nr_contexts_pullable--;
 		ret = true;
 		if (!atomic_read(&kctx->atoms_pulled)) {
-			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
 		}
 	}
@@ -981,11 +982,11 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
 /**
  * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
  *                                     where the caller must hold
- *                                     hwaccess_lock
+ *                                     runpool_irq.lock
  * @kbdev:  Device pointer
  * @js:     Job slot to use
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return:  Context to use for specified slot.
  *          NULL if no contexts present for specified slot
@@ -996,7 +997,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 {
 	struct kbase_context *kctx;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
 		return NULL;
@@ -1025,9 +1026,9 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
 	struct kbase_context *kctx;
 	unsigned long flags;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 	return kctx;
 }
@@ -1039,7 +1040,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
  * @js:            Job slot to use
  * @is_scheduled:  true if the context is currently scheduled
  *
- * Caller must hold hwaccess_lock
+ * Caller must hold runpool_irq.lock
  *
  * Return:         true if context can be pulled from on specified slot
  *                 false otherwise
@@ -1050,7 +1051,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_jd_atom *katom;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	js_devdata = &kctx->kbdev->js_data;
 
@@ -1243,7 +1244,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 	kbasep_js_clear_job_retry_submit(atom);
 
 	/* Lock for state available during IRQ */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	if (!kbase_js_dep_validate(kctx, atom)) {
 		/* Dependencies could not be represented */
@@ -1253,13 +1254,12 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		 * dependencies */
 		atom->status = KBASE_JD_ATOM_STATE_QUEUED;
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&js_devdata->runpool_mutex);
 
 		goto out_unlock;
 	}
 
-	kbase_tlstream_tl_attrib_atom_state(atom, TL_ATOM_STATE_READY);
 	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
 
 	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
@@ -1283,14 +1283,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 	if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
 		kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 	if (timer_sync)
 		kbase_backend_ctx_count_changed(kbdev);
 	mutex_unlock(&js_devdata->runpool_mutex);
 	/* End runpool transaction */
 
-	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
-		if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+	if (!js_kctx_info->ctx.is_scheduled) {
+		if (js_kctx_info->ctx.is_dying) {
 			/* A job got added while/after kbase_job_zap_context()
 			 * was called on a non-scheduled context (e.g. KDS
 			 * dependency resolved). Kill that job by killing the
@@ -1300,7 +1300,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 		} else if (js_kctx_info->ctx.nr_jobs == 1) {
 			/* Handle Refcount going from 0 to 1: schedule the
 			 * context on the Policy Queue */
-			KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
 			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
 
 			/* Policy Queue was updated - caller must try to
@@ -1356,7 +1356,7 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
 	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
 	kbasep_js_remove_job(kbdev, kctx, katom);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	/* The atom has 'finished' (will not be re-run), so no need to call
 	 * kbasep_js_has_atom_finished().
@@ -1366,7 +1366,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
 	 * whether it was soft-stopped or not */
 	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
 			&katom_retained_state);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return attr_state_changed;
 }
@@ -1383,9 +1384,9 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
 
 	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
 	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return result;
 }
@@ -1403,14 +1404,14 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
 	js_devdata = &kbdev->js_data;
 	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	found_kctx = js_per_as_data->kctx;
 
 	if (found_kctx != NULL)
 		++(js_per_as_data->as_busy_refcount);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return found_kctx;
 }
@@ -1425,7 +1426,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	js_devdata = &kbdev->js_data;
 	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
@@ -1447,7 +1448,8 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
  * @katom_retained_state:    Retained state from the atom
  * @runpool_ctx_attr_change: True if the runpool context attributes have changed
  *
- * This collates a set of actions that must happen whilst hwaccess_lock is held.
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
  *
  * This includes running more jobs when:
  * - The previously released kctx caused a ctx attribute change,
@@ -1474,7 +1476,7 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
 
 	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 	lockdep_assert_held(&js_devdata->runpool_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	if (js_devdata->nr_user_contexts_running != 0) {
 		bool retry_submit = false;
@@ -1539,7 +1541,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 	js_policy = &kbdev->js_data.policy;
 
 	/* Ensure context really is scheduled in */
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
 
 	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
 	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
@@ -1559,9 +1561,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 	 */
 	current_as = &kbdev->as[kctx_as_nr];
 	mutex_lock(&kbdev->pm.lock);
-	mutex_lock(&kbdev->mmu_hw_mutex);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
 	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
 
@@ -1576,7 +1577,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
 			new_ref_count);
 
-	if (new_ref_count == 1 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) &&
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+			KBASE_CTX_FLAG_PRIVILEGED &&
 			!kbase_pm_is_suspending(kbdev)) {
 		/* Context is kept scheduled into an address space even when
 		 * there are no jobs, in this case we have to handle the
@@ -1615,9 +1617,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		/* Ctx Attribute handling
 		 *
 		 * Releasing atoms attributes must either happen before this, or
-		 * after the KCTX_SHEDULED flag is changed, otherwise we
-		 * double-decount the attributes
-		 */
+		 * after 'is_scheduled' is changed, otherwise we double-decount
+		 * the attributes */
 		runpool_ctx_attr_change |=
 			kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
 
@@ -1647,11 +1648,11 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 						kctx, slot);
 		}
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 		kbase_backend_release_ctx_noirq(kbdev, kctx);
 
-		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&current_as->transaction_mutex);
 		mutex_unlock(&kbdev->pm.lock);
 
 		/* Note: Don't reuse kctx_as_nr now */
@@ -1660,7 +1661,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		kbase_backend_ctx_count_changed(kbdev);
 
 		/* update book-keeping info */
-		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		js_kctx_info->ctx.is_scheduled = false;
 		/* Signal any waiter that the context is not scheduled, so is
 		 * safe for termination - once the jsctx_mutex is also dropped,
 		 * and jobs have finished. */
@@ -1673,8 +1674,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
 				katom_retained_state, runpool_ctx_attr_change);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		mutex_unlock(&kbdev->mmu_hw_mutex);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
 		mutex_unlock(&kbdev->pm.lock);
 	}
 
@@ -1709,9 +1710,9 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
 	/* This is called if and only if you've you've detached the context from
 	 * the Runpool or the Policy Queue, and not added it back to the Runpool
 	 */
-	KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
 
-	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+	if (js_kctx_info->ctx.is_dying) {
 		/* Dying: don't requeue, but kill all jobs on the context. This
 		 * happens asynchronously */
 		dev_dbg(kbdev->dev,
@@ -1805,7 +1806,7 @@ static void kbasep_js_runpool_release_ctx_no_schedule(
 
 void kbase_js_set_timeouts(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	kbase_backend_timeouts_changed(kbdev);
 }
@@ -1840,7 +1841,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	mutex_lock(&js_devdata->runpool_mutex);
 
 	/* Check to see if context is dying due to kbase_job_zap_context() */
-	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+	if (js_kctx_info->ctx.is_dying) {
 		/* Roll back the transaction so far and return */
 		kbase_backend_release_free_address_space(kbdev, as_nr);
 
@@ -1854,17 +1855,17 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 				0u,
 				kbasep_js_trace_get_refcnt(kbdev, kctx));
 
-	kbase_ctx_flag_set(kctx, KCTX_SCHEDULED);
+	js_kctx_info->ctx.is_scheduled = true;
 
-	mutex_lock(&kbdev->mmu_hw_mutex);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	/* Assign context to previously chosen address space */
 	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		mutex_unlock(&kbdev->mmu_hw_mutex);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&new_address_space->transaction_mutex);
 		/* Roll back the transaction so far and return */
-		kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED);
+		js_kctx_info->ctx.is_scheduled = false;
 
 		kbase_backend_release_free_address_space(kbdev, as_nr);
 
@@ -1891,8 +1892,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	 * we just want it out immediately.
 	 *
 	 * The DMB required to read the suspend flag was issued recently as part
-	 * of the hwaccess_lock locking. If a suspend occurs *after* that lock
-	 * was taken (i.e. this condition doesn't execute), then the
+	 * of the runpool_irq locking. If a suspend occurs *after* that lock was
+	 * taken (i.e. this condition doesn't execute), then the
 	 * kbasep_js_suspend() code will cleanup this context instead (by virtue
 	 * of it being called strictly after the suspend flag is set, and will
 	 * wait for this lock to drop) */
@@ -1908,8 +1909,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	}
 
 	/* Transaction complete */
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kbdev->mmu_hw_mutex);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
 
 	/* Synchronize with any policy timers */
 	kbase_backend_ctx_count_changed(kbdev);
@@ -1933,18 +1934,18 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 static bool kbase_js_use_ctx(struct kbase_device *kbdev,
 				struct kbase_context *kctx)
 {
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
 		/* Context already has ASID - mark as active */
 		kbdev->hwaccess.active_kctx = kctx;
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		return true; /* Context already scheduled */
 	}
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return kbasep_js_schedule_ctx(kbdev, kctx);
 }
@@ -1970,9 +1971,9 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
 	/* Mark the context as privileged */
-	kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED);
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
 
-	is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED);
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
 	if (!is_scheduled) {
 		/* Add the context to the pullable list */
 		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
@@ -1988,7 +1989,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 
 		/* Wait for the context to be scheduled in */
 		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
-			   kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+			kctx->jctx.sched_info.ctx.is_scheduled);
 	} else {
 		/* Already scheduled in - We need to retain it to keep the
 		 * corresponding address space */
@@ -2009,7 +2010,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
 
 	/* We don't need to use the address space anymore */
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-	kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
 	/* Release the context - it will be scheduled out */
@@ -2031,7 +2032,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	/* Prevent all contexts from submitting */
 	js_devdata->runpool_irq.submit_allowed = 0;
@@ -2051,15 +2052,15 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 			/* We can only cope with up to 1 privileged context -
 			 * the instrumented context. It'll be suspended by
 			 * disabling instrumentation */
-			if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) {
+			if (kctx->jctx.sched_info.ctx.flags &
+					KBASE_CTX_FLAG_PRIVILEGED) {
 				++nr_privileged_ctx;
 				WARN_ON(nr_privileged_ctx != 1);
 			}
 		}
 	}
 	CSTD_UNUSED(nr_privileged_ctx);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	/* De-ref the previous retain to ensure each context gets pulled out
 	 * sometime later. */
@@ -2102,14 +2103,16 @@ void kbasep_js_resume(struct kbase_device *kbdev)
 
 			mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 			mutex_lock(&js_devdata->runpool_mutex);
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
-			if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
+			if (!js_kctx_info->ctx.is_scheduled &&
 				kbase_js_ctx_pullable(kctx, js, false))
 				timer_sync =
 					kbase_js_ctx_list_add_pullable_nolock(
 							kbdev, kctx, js);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
 			if (timer_sync)
 				kbase_backend_ctx_count_changed(kbdev);
 			mutex_unlock(&js_devdata->runpool_mutex);
@@ -2164,7 +2167,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 
 	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 	lockdep_assert_held(&kctx->jctx.lock);
 
 	/* If slot will transition from unpullable to pullable then add to
@@ -2174,6 +2177,9 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 	} else {
 		enqueue_required = false;
 	}
+	/* Check if there are lower priority jobs to soft stop */
+	kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+
 	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
 			(katom->pre_dep && (katom->pre_dep->atom_flags &
 			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
@@ -2185,9 +2191,6 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
 		enqueue_required = false;
 	} else {
-		/* Check if there are lower priority jobs to soft stop */
-		kbase_job_slot_ctx_priority_check_locked(kctx, katom);
-
 		/* Add atom to ring buffer. */
 		jsctx_tree_add(kctx, katom);
 		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
@@ -2207,7 +2210,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
  */
 static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
 {
-	lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock);
 
 	while (katom) {
 		WARN_ON(!(katom->atom_flags &
@@ -2247,7 +2250,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
 	struct kbase_jd_atom *x_dep = katom->x_post_dep;
 	struct kbase_jd_atom *next_katom = katom->post_dep;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	if (next_katom) {
 		KBASE_DEBUG_ASSERT(next_katom->status !=
@@ -2280,7 +2283,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 	KBASE_DEBUG_ASSERT(kctx);
 
 	js_devdata = &kctx->kbdev->js_data;
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
 
 	if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
 		return NULL;
@@ -2315,12 +2318,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 			return NULL;
 	}
 
-	kbase_ctx_flag_set(kctx, KCTX_PULLED);
-
+	kctx->pulled = true;
 	pulled = atomic_inc_return(&kctx->atoms_pulled);
 	if (pulled == 1 && !kctx->slots_pullable) {
-		WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-		kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF);
+		WARN_ON(kctx->ctx_runnable_ref);
+		kctx->ctx_runnable_ref = true;
 		atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
 	}
 	atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
@@ -2352,7 +2354,7 @@ static void js_return_worker(struct work_struct *data)
 	u64 affinity = katom->affinity;
 	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
-	kbase_tlstream_tl_event_atom_softstop_ex(katom);
+	kbase_tlstream_aux_job_softstop_ex(katom);
 
 	kbase_backend_complete_wq(kbdev, katom);
 
@@ -2369,7 +2371,7 @@ static void js_return_worker(struct work_struct *data)
 
 	atomic_dec(&katom->blocked);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
 			jsctx_rb_none_to_pull(kctx, js))
@@ -2377,14 +2379,14 @@ static void js_return_worker(struct work_struct *data)
 
 	if (!atomic_read(&kctx->atoms_pulled)) {
 		if (!kctx->slots_pullable) {
-			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
 			timer_sync = true;
 		}
 
 		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
-				!kbase_ctx_flag(kctx, KCTX_DYING)) {
+				!js_kctx_info->ctx.is_dying) {
 			int num_slots = kbdev->gpu_props.num_job_slots;
 			int slot;
 
@@ -2404,11 +2406,11 @@ static void js_return_worker(struct work_struct *data)
 		context_idle = true;
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	if (context_idle) {
-		WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
-		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		WARN_ON(!kctx->ctx_active);
+		kctx->ctx_active = false;
 		kbase_pm_context_idle(kbdev);
 	}
 
@@ -2430,7 +2432,7 @@ static void js_return_worker(struct work_struct *data)
 
 void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	jsctx_rb_unpull(kctx, katom);
 
@@ -2466,7 +2468,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
 
 	mutex_lock(&js_devdata->runpool_mutex);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
 		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
@@ -2474,8 +2476,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 
 		if (!atomic_read(&kctx->atoms_pulled) &&
 				!kctx->slots_pullable) {
-			WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF));
-			kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF);
+			WARN_ON(!kctx->ctx_runnable_ref);
+			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
 			timer_sync = true;
 		}
@@ -2497,7 +2499,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	 */
 	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
 					!atomic_read(&kctx->atoms_pulled) &&
-					!kbase_ctx_flag(kctx, KCTX_DYING)) {
+					!js_kctx_info->ctx.is_dying) {
 		int js;
 
 		kbasep_js_set_submit_allowed(js_devdata, kctx);
@@ -2524,9 +2526,9 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	 * jd_done_worker().
 	 */
 	if (context_idle)
-		kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+		kctx->ctx_active = false;
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 	if (timer_sync)
 		kbase_backend_ctx_count_changed(kbdev);
 	mutex_unlock(&js_devdata->runpool_mutex);
@@ -2534,8 +2536,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	return context_idle;
 }
 
-struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
-		ktime_t *end_timestamp)
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 {
 	u64 microseconds_spent = 0;
 	struct kbase_device *kbdev;
@@ -2547,7 +2548,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 
 	js_policy = &kbdev->js_data.policy;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	if (katom->will_fail_event_code)
 		katom->event_code = katom->will_fail_event_code;
@@ -2598,12 +2599,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
 				false))
 			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
 					x_dep->slot_nr);
-
-		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)
-			return x_dep;
 	}
-
-	return NULL;
 }
 
 void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
@@ -2633,7 +2629,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				break; /* No contexts on pullable list */
 			}
 
-			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
+			if (!kctx->ctx_active) {
 				context_idle = true;
 
 				if (kbase_pm_context_active_handle_suspend(
@@ -2652,16 +2648,18 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 					up(&js_devdata->schedule_sem);
 					return;
 				}
-				kbase_ctx_flag_set(kctx, KCTX_ACTIVE);
+				kctx->ctx_active = true;
 			}
 
 			if (!kbase_js_use_ctx(kbdev, kctx)) {
 				mutex_lock(
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
 				/* Context can not be used at this time */
-				spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+									flags);
 				if (kbase_js_ctx_pullable(kctx, js, false)
-				    || kbase_ctx_flag(kctx, KCTX_PRIVILEGED))
+					|| (kctx->jctx.sched_info.ctx.flags &
+						KBASE_CTX_FLAG_PRIVILEGED))
 					timer_sync |=
 					kbase_js_ctx_list_add_pullable_head_nolock(
 							kctx->kbdev, kctx, js);
@@ -2669,13 +2667,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 					timer_sync |=
 					kbase_js_ctx_list_add_unpullable_nolock(
 							kctx->kbdev, kctx, js);
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
+				spin_unlock_irqrestore(
+					&js_devdata->runpool_irq.lock, flags);
 				mutex_unlock(
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
 				if (context_idle) {
-					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
-					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
 					kbase_pm_context_idle(kbdev);
 				}
 
@@ -2684,15 +2682,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				break;
 			}
 			mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
-			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
+			kctx->pulled = false;
 
 			if (!kbase_jm_kick(kbdev, 1 << js))
 				/* No more jobs can be submitted on this slot */
 				js_mask &= ~(1 << js);
 
-			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
+			if (!kctx->pulled) {
 				/* Failed to pull jobs - push to head of list */
 				if (kbase_js_ctx_pullable(kctx, js, true))
 					timer_sync |=
@@ -2708,15 +2706,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				if (context_idle) {
 					kbase_jm_idle_ctx(kbdev, kctx);
 					spin_unlock_irqrestore(
-							&kbdev->hwaccess_lock,
-							flags);
-					WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE));
-					kbase_ctx_flag_clear(kctx, KCTX_ACTIVE);
+						&js_devdata->runpool_irq.lock,
+									flags);
+					WARN_ON(!kctx->ctx_active);
+					kctx->ctx_active = false;
 					kbase_pm_context_idle(kbdev);
 				} else {
 					spin_unlock_irqrestore(
-							&kbdev->hwaccess_lock,
-							flags);
+						&js_devdata->runpool_irq.lock,
+									flags);
 				}
 				mutex_unlock(
 					&kctx->jctx.sched_info.ctx.jsctx_mutex);
@@ -2734,8 +2732,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				timer_sync |=
 					kbase_js_ctx_list_add_unpullable_nolock(
 							kctx->kbdev, kctx, js);
-
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+									flags);
 			mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
 		}
 	}
@@ -2766,7 +2764,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 	mutex_lock(&kctx->jctx.lock);
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-	kbase_ctx_flag_set(kctx, KCTX_DYING);
+	js_kctx_info->ctx.is_dying = true;
 
 	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
 
@@ -2806,7 +2804,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 	 *    jobs and releasing the Power manager active reference will be
 	 *    handled when it leaves the runpool.
 	 */
-	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+	if (!js_kctx_info->ctx.is_scheduled) {
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 			if (!list_empty(
 				&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
@@ -2826,7 +2824,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		 */
 
 		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
-						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+						js_kctx_info->ctx.is_scheduled);
 
 		dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
 
@@ -2847,11 +2845,11 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		/* Case c: didn't evict, but it is scheduled - it's in the Run
 		 * Pool */
 		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
-						kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+						js_kctx_info->ctx.is_scheduled);
 		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
 
 		/* Disable the ctx from submitting any more jobs */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 		kbasep_js_clear_submit_allowed(js_devdata, kctx);
 
@@ -2872,7 +2870,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
 			kbase_job_slot_hardstop(kctx, js, NULL);
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
 		mutex_unlock(&kctx->jctx.lock);
@@ -2944,7 +2942,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 	kbdev = kctx->kbdev;
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
 					0u, trace_get_refcnt(kbdev, kctx));
@@ -2953,5 +2951,5 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
 		jsctx_queue_foreach(kctx, js, callback);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
index 8969222c3389..66b213293016 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -27,7 +27,6 @@
 
 #include "mali_kbase_js_defs.h"
 #include "mali_kbase_js_policy.h"
-#include "mali_kbase_context.h"
 #include "mali_kbase_defs.h"
 #include "mali_kbase_debug.h"
 
@@ -152,7 +151,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
  *
  * The following locking conditions are made on the caller:
  * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold hwaccess_lock (as this will be obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
  * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
  * obtained internally)
  * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
@@ -213,8 +213,8 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx
  *
  * The following locking conditions are made on the caller:
  * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
- * - it must \em not hold the hwaccess_lock, (as this will be obtained
- *   internally)
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
  * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
  * obtained internally)
  *
@@ -233,7 +233,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
  * @note This function can safely be called from IRQ context.
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
  *
  * @return value != false if the retain succeeded, and the context will not be scheduled out.
  * @return false if the retain failed (because the context is being/has been scheduled out).
@@ -247,7 +248,7 @@ bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_conte
  * @note This function can safely be called from IRQ context.
  *
  * The following locks must be held by the caller:
- * - hwaccess_lock
+ * - kbasep_js_device_data::runpool_irq::lock
  *
  * @return value != false if the retain succeeded, and the context will not be scheduled out.
  * @return false if the retain failed (because the context is being/has been scheduled out).
@@ -265,9 +266,9 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbas
  * @note This function can safely be called from IRQ context.
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
- *   If the hwaccess_lock is already held, then the caller should use
- *   kbasep_js_runpool_lookup_ctx_nolock() instead.
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
  *
  * @return a valid struct kbase_context on success, which has been refcounted as being busy.
  * @return NULL on failure, indicating that no context was found in \a as_nr
@@ -287,7 +288,7 @@ struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, i
  * Note: This function can safely be called from IRQ context.
  *
  * The following locking conditions are made on the caller:
- * - it must the hold the hwaccess_lock
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
  *
  * Return: a valid struct kbase_context on success, which has been refcounted as
  *         being busy.
@@ -361,12 +362,12 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kb
  * scheduled, or that already has a zero refcount.
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
  * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
  * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
  * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
  * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
  * obtained internally)
  *
@@ -410,11 +411,11 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
  * kbasep_js_release_privileged_ctx is called).
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
  * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
  * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
  * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
  * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
  * be used internally.
@@ -428,12 +429,12 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_
  * See kbasep_js_runpool_release_ctx for potential side effects.
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
  * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
  * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
  * obtained internally)
- * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be
- * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
  *
  */
 void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
@@ -443,7 +444,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_c
  *
  * The following locks may be used:
  * - kbasep_js_device_data::runpool_mutex
- * - hwaccess_lock
+ * - kbasep_js_device_data::runpool_irq::lock
  */
 void kbase_js_try_run_jobs(struct kbase_device *kbdev);
 
@@ -563,10 +564,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
  *
  * @param[in] katom         Pointer to the atom to complete
  * @param[in] end_timestamp The time that the atom completed (may be NULL)
- *
- * Return: Atom that has now been unblocked and can now be run, or NULL if none
  */
-struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom,
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
 		ktime_t *end_timestamp);
 
 /**
@@ -631,7 +630,7 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev);
  *
  * As with any bool, never test the return value with true.
  *
- * The caller must hold hwaccess_lock.
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
  */
 static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
 {
@@ -639,7 +638,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_
 
 	/* Ensure context really is scheduled in */
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
 
 	test_bit = (u16) (1u << kctx->as_nr);
 
@@ -652,7 +651,7 @@ static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_
  * The purpose of this abstraction is to hide the underlying data size, and wrap up
  * the long repeated line of code.
  *
- * The caller must hold hwaccess_lock.
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
  */
 static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
 {
@@ -660,7 +659,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js
 
 	/* Ensure context really is scheduled in */
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
 
 	set_bit = (u16) (1u << kctx->as_nr);
 
@@ -675,7 +674,7 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js
  * The purpose of this abstraction is to hide the underlying data size, and wrap up
  * the long repeated line of code.
  *
- * The caller must hold hwaccess_lock.
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
  */
 static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
 {
@@ -684,7 +683,7 @@ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *
 
 	/* Ensure context really is scheduled in */
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
 
 	clear_bit = (u16) (1u << kctx->as_nr);
 	clear_mask = ~clear_bit;
@@ -793,7 +792,7 @@ static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_a
 /**
  * Debug Check the refcount of a context. Only use within ASSERTs
  *
- * Obtains hwaccess_lock
+ * Obtains kbasep_js_device_data::runpool_irq::lock
  *
  * @return negative value if the context is not scheduled in
  * @return current refcount of the context if it is scheduled in. The refcount
@@ -810,12 +809,12 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev,
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	as_nr = kctx->as_nr;
 	if (as_nr != KBASEP_AS_NR_INVALID)
 		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return result;
 }
@@ -830,7 +829,8 @@ static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev,
  * when there is no ctx in \a as_nr (NULL returned).
  *
  * The following locking conditions are made on the caller:
- * - it must \em not hold the hwaccess_lock, because it will be used internally.
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
  *
  * @return a valid struct kbase_context on success, with a refcount that is guarenteed
  * to be non-zero and unmodified by this function.
@@ -848,12 +848,12 @@ static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct
 	js_devdata = &kbdev->js_data;
 	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
 	found_kctx = js_per_as_data->kctx;
 	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return found_kctx;
 }
@@ -948,7 +948,7 @@ static inline void kbase_js_runpool_inc_context_count(
 	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
 	++(js_devdata->nr_all_contexts_running);
 
-	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
 		/* Track contexts that can submit jobs */
 		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
 									S8_MAX);
@@ -981,7 +981,7 @@ static inline void kbase_js_runpool_dec_context_count(
 	--(js_devdata->nr_all_contexts_running);
 	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
 
-	if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
 		/* Track contexts that can submit jobs */
 		--(js_devdata->nr_user_contexts_running);
 		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
index 455b661a1176..e6e611b9f415 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,9 +51,9 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s
 	js_kctx_info = &kctx->jctx.sched_info;
 
 	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
 
 	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
 		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
@@ -97,8 +97,8 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev,
 	js_kctx_info = &kctx->jctx.sched_info;
 
 	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-	KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED));
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
 
 	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
 		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
@@ -136,13 +136,13 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc
 	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
 	js_kctx_info = &kctx->jctx.sched_info;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
 
 	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
 
-	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
 		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
 		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
 		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
@@ -176,8 +176,8 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru
 	lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
 	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
 
-	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
-		lockdep_assert_held(&kbdev->hwaccess_lock);
+	if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
 		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
 		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
@@ -202,7 +202,7 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	js_kctx_info = &kctx->jctx.sched_info;
 
-	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
 		/* This context never submits, so don't track any scheduling attributes */
 		return;
 	}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
index e6a9d41b6a08..e1342045b394 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -44,6 +44,17 @@ struct kbase_device;
 struct kbase_jd_atom;
 
 
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
 typedef u32 kbase_context_flags;
 
 struct kbasep_atom_req {
@@ -174,8 +185,9 @@ typedef u32 kbasep_js_atom_done_code;
 /**
  * Data used by the scheduler that is unique for each Address Space.
  *
- * This is used in IRQ context and hwaccess_lock must be held whilst accessing
- * this data (inculding reads and atomic decisions based on the read).
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
  */
 struct kbasep_js_per_as_data {
 	/**
@@ -206,9 +218,27 @@ struct kbasep_js_per_as_data {
  * to remove masking).
  */
 struct kbasep_js_device_data {
-	/* Sub-structure to collect together Job Scheduling data used in IRQ
-	 * context. The hwaccess_lock must be held when accessing. */
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
 	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
 		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
 		 * When bit 'N' is set in this, it indicates whether the context bound to address space
 		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
@@ -353,8 +383,8 @@ struct kbasep_js_kctx_info {
 	 * Runpool substructure. This must only be accessed whilst the Run Pool
 	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
 	 *
-	 * In addition, the hwaccess_lock may need to be held for certain
-	 * sub-members.
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
 	 *
 	 * @note some of the members could be moved into struct kbasep_js_device_data for
 	 * improved d-cache/tlb efficiency.
@@ -388,11 +418,21 @@ struct kbasep_js_kctx_info {
 		 * the context. **/
 		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
 
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
 		/**
-		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
 		 * */
 		wait_queue_head_t is_scheduled_wait;
 
+		bool is_dying;			/**< Is the context in the process of being evicted? */
+
 		/** Link implementing JS queues. Context can be present on one
 		 * list per job slot
 		 */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
index d1f3a0abea1f..debd0117d45a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -531,7 +531,7 @@ void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct
  * The locking conditions on the caller are as follows:
  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  * - it will be holding kbasep_js_device_data::runpool_mutex.
- * - it will be holding hwaccess_lock (a spinlock)
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
  *
  * Due to a spinlock being held, this function must not call any APIs that sleep.
  */
@@ -548,7 +548,7 @@ void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct
  * The locking conditions on the caller are as follows:
  * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  * - it will be holding kbasep_js_device_data::runpool_mutex.
- * - it will be holding hwaccess_lock (a spinlock)
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
  *
  * Due to a spinlock being held, this function must not call any APIs that sleep.
  */
@@ -558,7 +558,7 @@ void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, stru
  * @brief Indicate whether a context should be removed from the Run Pool
  * (should be scheduled out).
  *
- * The hwaccess_lock will be held by the caller.
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
  *
  * @note This API is called from IRQ context.
  */
@@ -715,7 +715,7 @@ bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx,
  * the policy that the job should be run again at some point later.
  *
  * The caller has the following conditions on locking:
- * - hwaccess_lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
  * - kbasep_js_device_data::runpool_mutex will be held.
  * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
  */
@@ -746,7 +746,7 @@ void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbas
  * @note This API is called from IRQ context.
  *
  * The caller has the following conditions on locking:
- * - hwaccess_lock will be held.
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
  *
  * @param js_policy     job scheduler policy
  * @param katom         job dispatch atom
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
index 1ac05693b62c..90c13458ec7c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -149,9 +149,9 @@ static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev,
 
 	js_devdata = &kbdev->js_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return refcnt;
 }
@@ -223,7 +223,7 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *
 	 * head_runtime_us occur strictly after this context is initialized */
 	mutex_lock(&js_devdata->queue_mutex);
 
-	/* No need to hold the the hwaccess_lock here, because we're initializing
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
 	 * the value, and the context is definitely not being updated in the
 	 * runpool at this point. The queue_mutex ensures the memory barrier. */
 	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
index 0a8454c033d8..b457d8215abe 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -54,10 +54,10 @@ struct kbasep_js_policy_cfs {
  *                     after ctx init
  * @process_priority:  calling process NICE priority, in the range -20..19
  *
- * hwaccess_lock must be held when updating @runtime_us. Initializing will occur
- * on context init and context enqueue (which can only occur in one thread at a
- * time), but multi-thread access only occurs while the context is in the
- * runpool.
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
  *
  * Reads are possible without the spinlock, but an older value might be read if
  * no memory barries are issued beforehand.
@@ -72,7 +72,7 @@ struct kbasep_js_policy_cfs_ctx {
  * struct kbasep_js_policy_cfs_job - per job information for CFS
  * @ticks: number of ticks that this job has been executing for
  *
- * hwaccess_lock must be held when accessing @ticks.
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
  */
 struct kbasep_js_policy_cfs_job {
 	u32 ticks;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
index 4824b31673d3..c1851caa95a0 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -485,7 +485,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 #endif
 
 #ifdef CONFIG_64BIT
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kctx->is_compat)
 		same_va_bits = 32;
 	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
 		same_va_bits = 33;
@@ -509,7 +509,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 
 #ifdef CONFIG_64BIT
 	/* 32-bit clients have exec and custom VA zones */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (kctx->is_compat) {
 #endif
 		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
 			err = -EINVAL;
@@ -573,7 +573,7 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
 	 * Nothing to do for 32-bit clients, JIT uses the existing
 	 * custom VA zone.
 	 */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kctx->is_compat)
 		return 0;
 
 #if defined(CONFIG_ARM64)
@@ -1730,7 +1730,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
 KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
 		kbase_jit_debugfs_phys_get);
 
-void kbase_jit_debugfs_init(struct kbase_context *kctx)
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
 {
 	/* Debugfs entry for getting the number of JIT allocations. */
 	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
index 8953c852e765..7b2433e868bd 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -400,8 +400,7 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
 		return -ENOMEM;
 	reg->cpu_alloc->imported.kctx = kctx;
 	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
-	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
-	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
+	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
 		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
 				KBASE_MEM_TYPE_NATIVE);
 		reg->gpu_alloc->imported.kctx = kctx;
@@ -488,7 +487,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool);
  * 1. If there are free pages in the pool, allocate a page from @pool.
  * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page
  *    from @next_pool.
- * 3. Return NULL if no memory in the pool
+ * 3. Finally, allocate a page from the kernel.
  *
  * Return: Pointer to allocated page, or NULL if allocation failed.
  */
@@ -573,39 +572,19 @@ static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool)
  */
 void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
 
-/**
- * kbase_mem_pool_grow - Grow the pool
- * @pool:       Memory pool to grow
- * @nr_to_grow: Number of pages to add to the pool
- *
- * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
- * become larger than the maximum size specified.
- *
- * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
- */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
-
 /**
  * kbase_mem_pool_trim - Grow or shrink the pool to a new size
  * @pool:     Memory pool to trim
  * @new_size: New number of pages in the pool
  *
  * If @new_size > @cur_size, fill the pool with new pages from the kernel, but
- * not above the max_size for the pool.
+ * not above @max_size.
  * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel.
- */
-void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
-
-/*
- * kbase_mem_alloc_page - Allocate a new page for a device
- * @kbdev: The kbase device
  *
- * Most uses should use kbase_mem_pool_alloc to allocate a page. However that
- * function can fail in the event the pool is empty.
- *
- * Return: A new page or NULL if no memory
+ * Return: The new size of the pool
  */
-struct page *kbase_mem_alloc_page(struct kbase_device *kbdev);
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
+
 
 int kbase_region_tracker_init(struct kbase_context *kctx);
 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
@@ -668,8 +647,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
 
 /**
  * The caller has the following locking conditions:
- * - It must hold kbase_device->mmu_hw_mutex
- * - It must hold the hwaccess_lock
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
  */
 void kbase_mmu_update(struct kbase_context *kctx);
 
@@ -681,8 +660,8 @@ void kbase_mmu_update(struct kbase_context *kctx);
  * data from provided kbase context from the GPU caches.
  *
  * The caller has the following locking conditions:
- * - It must hold kbase_device->mmu_hw_mutex
- * - It must hold the hwaccess_lock
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
  */
 void kbase_mmu_disable(struct kbase_context *kctx);
 
@@ -695,7 +674,7 @@ void kbase_mmu_disable(struct kbase_context *kctx);
  * This function must only be called during reset/power-up and it used to
  * ensure the registers are in a known state.
  *
- * The caller must hold kbdev->mmu_hw_mutex.
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
  */
 void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
 
@@ -915,10 +894,10 @@ void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
 
 #ifdef CONFIG_DEBUG_FS
 /**
- * kbase_jit_debugfs_init - Add per context debugfs entry for JIT.
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
  * @kctx: kbase context
  */
-void kbase_jit_debugfs_init(struct kbase_context *kctx);
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
 #endif /* CONFIG_DEBUG_FS */
 
 /**
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
index b6dac5586a7a..f91d3c916355 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -30,15 +30,13 @@
 #include <linux/fs.h>
 #include <linux/version.h>
 #include <linux/dma-mapping.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
-#include <linux/dma-attrs.h>
-#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)  */
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
 #include <linux/shrinker.h>
-#include <linux/cache.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
@@ -114,7 +112,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		goto bad_size;
 
 #if defined(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kctx->is_compat)
 		cpu_va_bits = 32;
 #endif
 
@@ -210,19 +208,18 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		}
 
 		/*
-		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
-		 * For all other versions we can just return the cookie
+		 * Pre-10.1 UKU userland calls mmap for us so return the
+		 * unaligned address and skip the map.
 		 */
-		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
-		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
+		if (kctx->api_version < KBASE_API_VERSION(10, 1)) {
 			*gpu_va = (u64) cookie;
 			return reg;
 		}
 
 		/*
-		 * To achieve alignment and avoid allocating on large alignment
-		 * (to work around a GPU hardware issue) we must allocate 3
-		 * times the required size.
+		 * GPUCORE-2190:
+		 *
+		 * We still need to return alignment for old userspace.
 		 */
 		if (*va_alignment)
 			va_map += 3 * (1UL << *va_alignment);
@@ -236,10 +233,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 				MAP_SHARED, cookie);
 
 		if (IS_ERR_VALUE(cpu_addr)) {
-			kbase_gpu_vm_lock(kctx);
 			kctx->pending_regions[cookie_nr] = NULL;
 			kctx->cookies |= (1UL << cookie_nr);
-			kbase_gpu_vm_unlock(kctx);
 			goto no_mmap;
 		}
 
@@ -1041,7 +1036,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in
 		shared_zone = true;
 
 #ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kctx->is_compat) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
@@ -1138,7 +1133,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		shared_zone = true;
 
 #ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kctx->is_compat) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
@@ -1271,7 +1266,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 	*num_pages = nents * stride;
 
 #ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kctx->is_compat) {
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
 		 * clients */
 		*flags |= BASE_MEM_NEED_MMAP;
@@ -1363,7 +1358,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 	}
 
 #ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kctx->is_compat) {
 		/* Bind to a cookie */
 		if (!kctx->cookies) {
 			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1416,32 +1411,6 @@ bad_flags:
 	return 0;
 }
 
-static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
-{
-	u32 cpu_cache_line_size = cache_line_size();
-	u32 gpu_cache_line_size =
-		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
-
-	return ((cpu_cache_line_size > gpu_cache_line_size) ?
-				cpu_cache_line_size :
-				gpu_cache_line_size);
-}
-
-static int kbase_check_buffer_size(struct kbase_context *kctx, u64 size)
-{
-	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
-
-	return (size & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
-}
-
-static int kbase_check_buffer_cache_alignment(struct kbase_context *kctx,
-					void __user *ptr)
-{
-	u32 cache_line_align = kbase_get_cache_line_alignment(kctx);
-
-	return ((uintptr_t)ptr & (cache_line_align - 1)) == 0 ? 0 : -EINVAL;
-}
-
 int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		void __user *phandle, u64 *gpu_va, u64 *va_pages,
 		u64 *flags)
@@ -1454,7 +1423,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 	KBASE_DEBUG_ASSERT(flags);
 
 #ifdef CONFIG_64BIT
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (!kctx->is_compat)
 		*flags |= BASE_MEM_SAME_VA;
 #endif
 
@@ -1497,26 +1466,12 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 			reg = NULL;
 		} else {
 #ifdef CONFIG_COMPAT
-			if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+			if (kctx->is_compat)
 				uptr = compat_ptr(user_buffer.ptr.compat_value);
 			else
 #endif
 				uptr = user_buffer.ptr.value;
 
-			if (0 != kbase_check_buffer_cache_alignment(kctx,
-									uptr)) {
-				dev_warn(kctx->kbdev->dev,
-				"User buffer is not cache line aligned!\n");
-				goto no_reg;
-			}
-
-			if (0 != kbase_check_buffer_size(kctx,
-					user_buffer.length)) {
-				dev_warn(kctx->kbdev->dev,
-				"User buffer size is not multiple of cache line size!\n");
-				goto no_reg;
-			}
-
 			reg = kbase_mem_from_user_buffer(kctx,
 					(unsigned long)uptr, user_buffer.length,
 					va_pages, flags);
@@ -2191,6 +2146,44 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx)
 	up_read(&mm->mmap_sem);
 }
 
+#if defined(CONFIG_DMA_SHARED_BUFFER) && defined(CONFIG_MALI_TRACE_TIMELINE)
+/* This section is required only for instrumentation. */
+
+static void kbase_dma_buf_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* Non-atomic as we're under Linux's mm lock. */
+	map->count++;
+}
+
+static void kbase_dma_buf_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* Non-atomic as we're under Linux's mm lock. */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+
+	kbase_gpu_vm_lock(map->kctx);
+	list_del(&map->mappings_list);
+	kbase_gpu_vm_unlock(map->kctx);
+	kfree(map);
+}
+
+static const struct vm_operations_struct kbase_dma_mmap_ops = {
+	.open  = kbase_dma_buf_vm_open,
+	.close = kbase_dma_buf_vm_close,
+};
+#endif /* CONFIG_DMA_SHARED_BUFFER && CONFIG_MALI_TRACE_TIMELINE */
+
 int kbase_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct kbase_context *kctx = file->private_data;
@@ -2409,6 +2402,35 @@ map:
 #ifdef CONFIG_DMA_SHARED_BUFFER
 dma_map:
 	err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#if defined(CONFIG_MALI_TRACE_TIMELINE)
+	/* This section is required only for instrumentation. */
+	/* Add created mapping to imported region mapping list.
+	 * It is important to make it visible to dumping infrastructure.
+	 * Add mapping only if vm_ops structure is not used by memory owner. */
+	WARN_ON(vma->vm_ops);
+	WARN_ON(vma->vm_private_data);
+	if (!err && !vma->vm_ops && !vma->vm_private_data) {
+		struct kbase_cpu_mapping *map = kzalloc(
+			sizeof(*map),
+			GFP_KERNEL);
+
+		if (map) {
+			map->kctx     = reg->kctx;
+			map->region   = NULL;
+			map->page_off = vma->vm_pgoff;
+			map->vm_start = vma->vm_start;
+			map->vm_end   = vma->vm_end;
+			map->count    = 1; /* start with one ref */
+
+			vma->vm_ops          = &kbase_dma_mmap_ops;
+			vma->vm_private_data = map;
+
+			list_add(
+				&map->mappings_list,
+				&reg->cpu_alloc->mappings);
+		}
+	}
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
 #endif /* CONFIG_DMA_SHARED_BUFFER */
 out_unlock:
 	kbase_gpu_vm_unlock(kctx);
@@ -2697,9 +2719,7 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_
 	dma_addr_t  dma_pa;
 	struct kbase_va_region *reg;
 	phys_addr_t *page_array;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
 	DEFINE_DMA_ATTRS(attrs);
 #endif
 
@@ -2715,13 +2735,9 @@ void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_
 		goto err;
 
 	/* All the alloc calls return zeroed memory */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
-			     attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
-			     &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
 #else
 	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
 #endif
@@ -2768,9 +2784,7 @@ no_mmap:
 no_alloc:
 	kfree(reg);
 no_reg:
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
 	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
 #else
 	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
@@ -2784,8 +2798,7 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han
 {
 	struct kbase_va_region *reg;
 	int err;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
 	DEFINE_DMA_ATTRS(attrs);
 #endif
 
@@ -2803,10 +2816,7 @@ void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *han
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
 	kfree(reg);
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	dma_free_attrs(kctx->kbdev->dev, handle->size,
-		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	dma_free_attrs(kctx->kbdev->dev, handle->size,
 			handle->cpu_va, handle->dma_pa, &attrs);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
index 9a3f9b571d96..957061893b00 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -175,11 +175,11 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool,
 	kbase_mem_pool_add(next_pool, p);
 }
 
-struct page *kbase_mem_alloc_page(struct kbase_device *kbdev)
+static struct page *kbase_mem_pool_alloc_page(struct kbase_mem_pool *pool)
 {
 	struct page *p;
 	gfp_t gfp;
-	struct device *dev = kbdev->dev;
+	struct device *dev = pool->kbdev->dev;
 	dma_addr_t dma_addr;
 
 #if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \
@@ -210,6 +210,8 @@ struct page *kbase_mem_alloc_page(struct kbase_device *kbdev)
 
 	kbase_set_dma_addr(p, dma_addr);
 
+	pool_dbg(pool, "alloced page from kernel\n");
+
 	return p;
 }
 
@@ -254,35 +256,36 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 	return nr_freed;
 }
 
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
+static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 		size_t nr_to_grow)
 {
 	struct page *p;
 	size_t i;
 
-	for (i = 0; i < nr_to_grow; i++) {
-		p = kbase_mem_alloc_page(pool->kbdev);
+	for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) {
+		p = kbase_mem_pool_alloc_page(pool);
 		if (!p)
-			return -ENOMEM;
+			break;
 		kbase_mem_pool_add(pool, p);
 	}
 
-	return 0;
+	return i;
 }
 
-void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
+size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 {
 	size_t cur_size;
 
 	cur_size = kbase_mem_pool_size(pool);
 
-	if (new_size > pool->max_size)
-		new_size = pool->max_size;
-
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
 		kbase_mem_pool_grow(pool, new_size - cur_size);
+
+	cur_size = kbase_mem_pool_size(pool);
+
+	return cur_size;
 }
 
 void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size)
@@ -425,17 +428,21 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
 {
 	struct page *p;
 
-	do {
-		pool_dbg(pool, "alloc()\n");
-		p = kbase_mem_pool_remove(pool);
+	pool_dbg(pool, "alloc()\n");
+
+	p = kbase_mem_pool_remove(pool);
 
-		if (p)
-			return p;
+	if (!p && pool->next_pool) {
+		/* Allocate via next pool */
+		return kbase_mem_pool_alloc(pool->next_pool);
+	}
 
-		pool = pool->next_pool;
-	} while (pool);
+	if (!p) {
+		/* Get page from kernel */
+		p = kbase_mem_pool_alloc_page(pool);
+	}
 
-	return NULL;
+	return p;
 }
 
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
@@ -492,7 +499,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 
 	/* Get any remaining pages from kernel */
 	for (; i < nr_pages; i++) {
-		p = kbase_mem_alloc_page(pool->kbdev);
+		p = kbase_mem_pool_alloc_page(pool);
 		if (!p)
 			goto err_rollback;
 		pages[i] = page_to_phys(p);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
index 585fba036c9e..493665b7e607 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -68,7 +68,7 @@ DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
 		kbase_mem_pool_debugfs_max_size_set,
 		"%llu\n");
 
-void kbase_mem_pool_debugfs_init(struct dentry *parent,
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
 		struct kbase_mem_pool *pool)
 {
 	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
index 1442854e8956..458f3f09e697 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,7 @@
 #include <mali_kbase.h>
 
 /**
- * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
+ * kbase_mem_pool_debugfs_add - add debugfs knobs for @pool
  * @parent: Parent debugfs dentry
  * @pool:   Memory pool to control
  *
@@ -29,7 +29,7 @@
  * - mem_pool_size: get/set the current size of @pool
  * - mem_pool_max_size: get/set the max size of @pool
  */
-void kbase_mem_pool_debugfs_init(struct dentry *parent,
+void kbase_mem_pool_debugfs_add(struct dentry *parent,
 		struct kbase_mem_pool *pool);
 
 #endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
index 092da9a96ea5..03594102f7ef 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -67,27 +67,26 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
 	mutex_lock(&kctx->mem_profile_lock);
 
 	dev_dbg(kctx->kbdev->dev, "initialised: %d",
-		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+				kctx->mem_profile_initialized);
 
-	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+	if (!kctx->mem_profile_initialized) {
 		if (!debugfs_create_file("mem_profile", S_IRUGO,
 					kctx->kctx_dentry, kctx,
 					&kbasep_mem_profile_debugfs_fops)) {
 			err = -EAGAIN;
 		} else {
-			kbase_ctx_flag_set(kctx,
-					   KCTX_MEM_PROFILE_INITIALIZED);
+			kctx->mem_profile_initialized = true;
 		}
 	}
 
-	if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
+	if (kctx->mem_profile_initialized) {
 		kfree(kctx->mem_profile_data);
 		kctx->mem_profile_data = data;
 		kctx->mem_profile_size = size;
 	}
 
 	dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
-		err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+				err, kctx->mem_profile_initialized);
 
 	mutex_unlock(&kctx->mem_profile_lock);
 
@@ -99,7 +98,7 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
 	mutex_lock(&kctx->mem_profile_lock);
 
 	dev_dbg(kctx->kbdev->dev, "initialised: %d",
-				kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
+				kctx->mem_profile_initialized);
 
 	kfree(kctx->mem_profile_data);
 	kctx->mem_profile_data = NULL;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
index 5c1b9c3b84c4..48d53723a9b4 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -41,7 +41,6 @@
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_time.h>
-#include <mali_kbase_mem.h>
 
 #define KBASE_MMU_PAGE_ENTRIES 512
 
@@ -140,16 +139,6 @@ void page_fault_worker(struct work_struct *data)
 
 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 
-	if (unlikely(faulting_as->protected_mode))
-	{
-		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Protected mode fault");
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
-				KBASE_MMU_FAULT_TYPE_PAGE);
-
-		goto fault_done;
-	}
-
 	fault_status = faulting_as->fault_status;
 	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
 
@@ -164,7 +153,7 @@ void page_fault_worker(struct work_struct *data)
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Translation table bus fault");
+				"Tranlation table bus fault");
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
@@ -230,8 +219,6 @@ void page_fault_worker(struct work_struct *data)
 				region->start_pfn +
 				kbase_reg_current_backed_size(region));
 
-		mutex_lock(&kbdev->mmu_hw_mutex);
-
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		/* [1] in case another page fault occurred while we were
@@ -244,9 +231,6 @@ void page_fault_worker(struct work_struct *data)
 		 */
 		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
 				AS_COMMAND_UNLOCK, 1);
-
-		mutex_unlock(&kbdev->mmu_hw_mutex);
-
 		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		kbase_gpu_vm_unlock(kctx);
@@ -265,17 +249,12 @@ void page_fault_worker(struct work_struct *data)
 				kbase_reg_current_backed_size(region);
 
 	if (0 == new_pages) {
-		mutex_lock(&kbdev->mmu_hw_mutex);
-
 		/* Duplicate of a fault we've already handled, nothing to do */
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		/* See comment [1] about UNLOCK usage */
 		kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
 				AS_COMMAND_UNLOCK, 1);
-
-		mutex_unlock(&kbdev->mmu_hw_mutex);
-
 		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 		kbase_gpu_vm_unlock(kctx);
@@ -335,7 +314,7 @@ void page_fault_worker(struct work_struct *data)
 		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
 
 		/* AS transaction begin */
-		mutex_lock(&kbdev->mmu_hw_mutex);
+		mutex_lock(&faulting_as->transaction_mutex);
 
 		/* flush L2 and unlock the VA (resumes the MMU) */
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
@@ -359,7 +338,7 @@ void page_fault_worker(struct work_struct *data)
 					  new_pages,
 					  op, 1);
 
-		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&faulting_as->transaction_mutex);
 		/* AS transaction end */
 
 		/* reenable this in the mask */
@@ -427,17 +406,14 @@ sub_pages:
 
 KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
 
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
- * new table from the pool if needed and possible
- */
-static int mmu_get_next_pgd(struct kbase_context *kctx,
-		phys_addr_t *pgd, u64 vpfn, int level)
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
 {
 	u64 *page;
 	phys_addr_t target_pgd;
 	struct page *p;
 
-	KBASE_DEBUG_ASSERT(*pgd);
+	KBASE_DEBUG_ASSERT(pgd);
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 
 	lockdep_assert_held(&kctx->mmu_lock);
@@ -449,11 +425,11 @@ static int mmu_get_next_pgd(struct kbase_context *kctx,
 	vpfn >>= (3 - level) * 9;
 	vpfn &= 0x1FF;
 
-	p = pfn_to_page(PFN_DOWN(*pgd));
+	p = pfn_to_page(PFN_DOWN(pgd));
 	page = kmap(p);
 	if (NULL == page) {
 		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
-		return -EINVAL;
+		return 0;
 	}
 
 	target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
@@ -461,9 +437,9 @@ static int mmu_get_next_pgd(struct kbase_context *kctx,
 	if (!target_pgd) {
 		target_pgd = kbase_mmu_alloc_pgd(kctx);
 		if (!target_pgd) {
-			dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
 			kunmap(p);
-			return -ENOMEM;
+			return 0;
 		}
 
 		kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
@@ -473,13 +449,10 @@ static int mmu_get_next_pgd(struct kbase_context *kctx,
 	}
 
 	kunmap(p);
-	*pgd = target_pgd;
-
-	return 0;
+	return target_pgd;
 }
 
-static int mmu_get_bottom_pgd(struct kbase_context *kctx,
-		u64 vpfn, phys_addr_t *out_pgd)
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
 {
 	phys_addr_t pgd;
 	int l;
@@ -488,17 +461,15 @@ static int mmu_get_bottom_pgd(struct kbase_context *kctx,
 
 	pgd = kctx->pgd;
 	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
-		int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
 		/* Handle failure condition */
-		if (err) {
-			dev_dbg(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
-			return err;
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
 		}
 	}
 
-	*out_pgd = pgd;
-
-	return 0;
+	return pgd;
 }
 
 static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
@@ -637,19 +608,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		 * On the other hand, it's only a gain when we map more than
 		 * 256 pages at once (on average). Do we really care?
 		 */
-		do {
-			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
-					MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu_lock);
-		} while (!err);
-		if (err) {
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
 			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
 			if (recover_required) {
 				/* Invalidate the pages we have partially
@@ -658,6 +618,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
+			err = -EINVAL;
 			goto fail_unlock;
 		}
 
@@ -749,19 +710,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		 * On the other hand, it's only a gain when we map more than
 		 * 256 pages at once (on average). Do we really care?
 		 */
-		do {
-			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
-					MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu_lock);
-		} while (!err);
-		if (err) {
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
 			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
 			if (recover_required) {
 				/* Invalidate the pages we have partially
@@ -770,6 +720,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
+			err = -EINVAL;
 			goto fail_unlock;
 		}
 
@@ -918,7 +869,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
 			u32 op;
 
 			/* AS transaction begin */
-			mutex_lock(&kbdev->mmu_hw_mutex);
+			mutex_lock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
 
 			if (sync)
 				op = AS_COMMAND_FLUSH_MEM;
@@ -941,7 +893,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
 			}
 #endif /* KBASE_GPU_RESET_EN */
 
-			mutex_unlock(&kbdev->mmu_hw_mutex);
+			mutex_unlock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
 			/* AS transaction end */
 
 #ifndef CONFIG_MALI_NO_MALI
@@ -968,13 +921,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
 
 void kbase_mmu_update(struct kbase_context *kctx)
 {
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 	/* ASSERT that the context has a valid as_nr, which is only the case
 	 * when it's scheduled in.
 	 *
-	 * as_nr won't change because the caller has the hwaccess_lock */
+	 * as_nr won't change because the caller has the runpool_irq lock */
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
 
 	kctx->kbdev->mmu_mode->update(kctx);
 }
@@ -982,8 +935,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_update);
 
 void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	kbdev->mmu_mode->disable_as(kbdev, as_nr);
 }
@@ -993,10 +946,11 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	/* ASSERT that the context has a valid as_nr, which is only the case
 	 * when it's scheduled in.
 	 *
-	 * as_nr won't change because the caller has the hwaccess_lock */
+	 * as_nr won't change because the caller has the runpool_irq lock */
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	/*
 	 * The address space is being disabled, drain all knowledge of it out
@@ -1054,8 +1008,8 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 		if (count > nr)
 			count = nr;
 
-		err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
-		if (err) {
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
 			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
 			err = -EINVAL;
 			goto fail_unlock;
@@ -1138,20 +1092,10 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
 		if (count > nr)
 			count = nr;
 
-		do {
-			err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&kctx->mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->mem_pool,
-					MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu_lock);
-		} while (!err);
-		if (err) {
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
 			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			err = -EINVAL;
 			goto fail_unlock;
 		}
 
@@ -1372,13 +1316,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 		return NULL;
 	}
 
+	mutex_lock(&kctx->mmu_lock);
+
 	size_left = nr_pages * PAGE_SIZE;
 
 	KBASE_DEBUG_ASSERT(0 != size_left);
 	kaddr = vmalloc_user(size_left);
 
-	mutex_lock(&kctx->mmu_lock);
-
 	if (kaddr) {
 		u64 end_marker = 0xFFULL;
 		char *buffer;
@@ -1463,18 +1407,6 @@ void bus_fault_worker(struct work_struct *data)
 		return;
 	}
 
-	if (unlikely(faulting_as->protected_mode))
-	{
-		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Permission failure");
-		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
-				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
-		kbasep_js_runpool_release_ctx(kbdev, kctx);
-		atomic_dec(&kbdev->faults_pending);
-		return;
-
-	}
-
 #if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
 		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
@@ -1491,14 +1423,15 @@ void bus_fault_worker(struct work_struct *data)
 
 		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
 		/* AS transaction begin */
-		mutex_lock(&kbdev->mmu_hw_mutex);
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
 
 		/* Set the MMU into unmapped mode */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 		kbase_mmu_disable(kctx);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
 
-		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
 		/* AS transaction end */
 
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
@@ -1733,7 +1666,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 	dev_err(kbdev->dev,
 		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
 		"Reason: %s\n"
-		"raw fault status: 0x%X\n"
+		"raw fault status 0x%X\n"
 		"decoded fault status: %s\n"
 		"exception type 0x%X: %s\n"
 		"access type 0x%X: %s\n"
@@ -1762,15 +1695,15 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 
 	/* Stop the kctx from submitting more jobs and cause it to be scheduled
 	 * out/rescheduled - this will occur on releasing the context's refcount */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	kbasep_js_clear_submit_allowed(js_devdata, kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
 	 * context can appear in the job slots from this point on */
 	kbase_backend_jm_kill_jobs_from_kctx(kctx);
 	/* AS transaction begin */
-	mutex_lock(&kbdev->mmu_hw_mutex);
+	mutex_lock(&as->transaction_mutex);
 #if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
 		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
@@ -1782,11 +1715,11 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 	}
 #endif /* KBASE_GPU_RESET_EN */
 	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 	kbase_mmu_disable(kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
-	mutex_unlock(&kbdev->mmu_hw_mutex);
+	mutex_unlock(&as->transaction_mutex);
 	/* AS transaction end */
 	/* Clear down the fault */
 	kbase_mmu_hw_clear_fault(kbdev, as, kctx,
@@ -1823,20 +1756,20 @@ void kbasep_as_do_poke(struct work_struct *work)
 	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
 
 	/* AS transaction begin */
-	mutex_lock(&kbdev->mmu_hw_mutex);
+	mutex_lock(&as->transaction_mutex);
 	/* Force a uTLB invalidate */
 	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
 				  AS_COMMAND_UNLOCK, 0);
-	mutex_unlock(&kbdev->mmu_hw_mutex);
+	mutex_unlock(&as->transaction_mutex);
 	/* AS transaction end */
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 	if (as->poke_refcount &&
 		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
 		/* Only queue up the timer if we need it, and we're not trying to kill it */
 		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
 	}
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 }
 
 enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
@@ -1860,7 +1793,7 @@ enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
  * This must only be called on a context that's scheduled in, and an atom
  * that's running on the GPU.
  *
- * The caller must hold hwaccess_lock
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
  *
  * This can be called safely from atomic context
  */
@@ -1872,7 +1805,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_
 	KBASE_DEBUG_ASSERT(kctx);
 	KBASE_DEBUG_ASSERT(katom);
 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (katom->poking)
 		return;
@@ -1880,7 +1813,7 @@ void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_
 	katom->poking = 1;
 
 	/* It's safe to work on the as/as_nr without an explicit reference,
-	 * because the caller holds the hwaccess_lock, and the atom itself
+	 * because the caller holds the runpool_irq lock, and the atom itself
 	 * was also running and had already taken a reference  */
 	as = &kbdev->as[kctx->as_nr];
 
@@ -1917,18 +1850,18 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase
 
 	as = &kbdev->as[kctx->as_nr];
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
 	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
 
 	if (--(as->poke_refcount) == 0) {
 		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 		hrtimer_cancel(&as->poke_timer);
 		flush_workqueue(as->poke_wq);
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
 
 		/* Re-check whether it's still needed */
 		if (as->poke_refcount) {
@@ -1953,7 +1886,7 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase
 			 * the context until new atoms are run */
 		}
 	}
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 	katom->poking = 0;
 }
@@ -1962,7 +1895,7 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex
 {
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (!kctx) {
 		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
deleted file mode 100644
index c970650069cd..000000000000
--- a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-#include "mali_kbase.h"
-
-#include "mali_kbase_regs_history_debugfs.h"
-
-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
-
-#include <linux/debugfs.h>
-
-
-static int regs_history_size_get(void *data, u64 *val)
-{
-	struct kbase_io_history *const h = data;
-
-	*val = h->size;
-
-	return 0;
-}
-
-static int regs_history_size_set(void *data, u64 val)
-{
-	struct kbase_io_history *const h = data;
-
-	return kbase_io_history_resize(h, (u16)val);
-}
-
-
-DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
-		regs_history_size_get,
-		regs_history_size_set,
-		"%llu\n");
-
-
-/**
- * regs_history_show - show callback for the register access history file.
- *
- * @sfile: The debugfs entry
- * @data: Data associated with the entry
- *
- * This function is called to dump all recent accesses to the GPU registers.
- *
- * @return 0 if successfully prints data in debugfs entry file, failure
- * otherwise
- */
-static int regs_history_show(struct seq_file *sfile, void *data)
-{
-	struct kbase_io_history *const h = sfile->private;
-	u16 i;
-	size_t iters;
-	unsigned long flags;
-
-	if (!h->enabled) {
-		seq_puts(sfile, "The register access history is disabled\n");
-		goto out;
-	}
-
-	spin_lock_irqsave(&h->lock, flags);
-
-	iters = (h->size > h->count) ? h->count : h->size;
-	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
-			h->count);
-	for (i = 0; i < iters; ++i) {
-		struct kbase_io_access *io =
-			&h->buf[(h->count - iters + i) % h->size];
-		char const access = (io->addr & 1) ? 'w' : 'r';
-
-		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
-				(void *)(io->addr & ~0x1), io->value);
-	}
-
-	spin_unlock_irqrestore(&h->lock, flags);
-
-out:
-	return 0;
-}
-
-
-/**
- * regs_history_open - open operation for regs_history debugfs file
- *
- * @in: &struct inode pointer
- * @file: &struct file pointer
- *
- * @return file descriptor
- */
-static int regs_history_open(struct inode *in, struct file *file)
-{
-	return single_open(file, &regs_history_show, in->i_private);
-}
-
-
-static const struct file_operations regs_history_fops = {
-	.open = &regs_history_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-
-void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
-{
-	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
-			kbdev->mali_debugfs_directory,
-			&kbdev->io_history.enabled);
-	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
-			kbdev->mali_debugfs_directory,
-			&kbdev->io_history, &regs_history_size_fops);
-	debugfs_create_file("regs_history", S_IRUGO,
-			kbdev->mali_debugfs_directory, &kbdev->io_history,
-			&regs_history_fops);
-}
-
-
-#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
deleted file mode 100644
index f10837002330..000000000000
--- a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-/**
- * Header file for register access history support via debugfs
- *
- * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
- *
- * Usage:
- * - regs_history_enabled: whether recording of register accesses is enabled.
- *   Write 'y' to enable, 'n' to disable.
- * - regs_history_size: size of the register history buffer, must be > 0
- * - regs_history: return the information about last accesses to the registers.
- */
-
-#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
-#define _KBASE_REGS_HISTORY_DEBUGFS_H
-
-struct kbase_device;
-
-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
-
-/**
- * kbasep_regs_history_debugfs_init - add debugfs entries for register history
- *
- * @kbdev: Pointer to kbase_device containing the register history
- */
-void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
-
-#else /* CONFIG_DEBUG_FS */
-
-#define kbasep_regs_history_debugfs_init CSTD_NOP
-
-#endif /* CONFIG_DEBUG_FS */
-
-#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
index 84aa3316e435..d3a3dbfa5241 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_replay.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -1098,6 +1098,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
 bool kbase_replay_process(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
 	struct kbase_device *kbdev = kctx->kbdev;
 
 	/* Don't replay this atom if these issues are not present in the
@@ -1121,7 +1122,7 @@ bool kbase_replay_process(struct kbase_jd_atom *katom)
 		return false;
 	}
 
-	if (kbase_ctx_flag(kctx, KCTX_DYING)) {
+	if (jctx->sched_info.ctx.is_dying) {
 		dev_dbg(kbdev->dev, "Not replaying; context is dying\n");
 
 		if (katom->retry_count)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
index 88b91a2efba2..07b862546f80 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -639,7 +639,8 @@ struct kbase_debug_copy_buffer {
 	struct page **pages;
 	int nr_pages;
 	size_t offset;
-	struct kbase_mem_phy_alloc *gpu_alloc;
+	/*To find memory region*/
+	u64 gpu_addr;
 
 	struct page **extres_pages;
 	int nr_extres_pages;
@@ -676,7 +677,10 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
 	kbase_gpu_vm_lock(katom->kctx);
 	for (i = 0; i < nr; i++) {
 		int p;
-		struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc;
+		struct kbase_va_region *reg;
+
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
 
 		if (!buffers[i].pages)
 			break;
@@ -687,8 +691,8 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
 				put_page(pg);
 		}
 		kfree(buffers[i].pages);
-		if (gpu_alloc) {
-			switch (gpu_alloc->type) {
+		if (reg && reg->gpu_alloc) {
+			switch (reg->gpu_alloc->type) {
 			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
 			{
 				free_user_buffer(&buffers[i]);
@@ -698,7 +702,7 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
 				/* Nothing to be done. */
 				break;
 			}
-			kbase_mem_phy_alloc_put(gpu_alloc);
+			kbase_mem_phy_alloc_put(reg->gpu_alloc);
 		}
 	}
 	kbase_gpu_vm_unlock(katom->kctx);
@@ -786,20 +790,20 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
 			goto out_cleanup;
 		}
 
+		buffers[i].gpu_addr = user_extres.ext_resource &
+			~BASE_EXT_RES_ACCESS_EXCLUSIVE;
 		kbase_gpu_vm_lock(katom->kctx);
 		reg = kbase_region_tracker_find_region_enclosing_address(
-				katom->kctx, user_extres.ext_resource &
-				~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+				katom->kctx, buffers[i].gpu_addr);
 
-		if (NULL == reg || NULL == reg->gpu_alloc ||
+		if (NULL == reg || NULL == reg->cpu_alloc ||
 				(reg->flags & KBASE_REG_FREE)) {
 			ret = -EINVAL;
 			goto out_unlock;
 		}
+		kbase_mem_phy_alloc_get(reg->gpu_alloc);
 
-		buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
 		buffers[i].nr_extres_pages = reg->nr_pages;
-
 		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
 			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
 
@@ -905,22 +909,25 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 {
 	unsigned int i;
 	unsigned int target_page_nr = 0;
+	struct kbase_va_region *reg;
 	struct page **pages = buf_data->pages;
 	u64 offset = buf_data->offset;
 	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
 	size_t to_copy = min(extres_size, buf_data->size);
-	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
 	int ret = 0;
 
 	KBASE_DEBUG_ASSERT(pages != NULL);
 
 	kbase_gpu_vm_lock(kctx);
-	if (!gpu_alloc) {
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, buf_data->gpu_addr);
+
+	if (!reg) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
 
-	switch (gpu_alloc->type) {
+	switch (reg->gpu_alloc->type) {
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
 	{
 		for (i = 0; i < buf_data->nr_extres_pages; i++) {
@@ -943,14 +950,14 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 	break;
 #ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM: {
-		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
+		struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
 
 		KBASE_DEBUG_ASSERT(dma_buf != NULL);
 		KBASE_DEBUG_ASSERT(dma_buf->size ==
 				   buf_data->nr_extres_pages * PAGE_SIZE);
 
 		ret = dma_buf_begin_cpu_access(dma_buf,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
 				0, buf_data->nr_extres_pages*PAGE_SIZE,
 #endif
 				DMA_FROM_DEVICE);
@@ -973,7 +980,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 				break;
 		}
 		dma_buf_end_cpu_access(dma_buf,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
 				0, buf_data->nr_extres_pages*PAGE_SIZE,
 #endif
 				DMA_FROM_DEVICE);
@@ -1322,11 +1329,9 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 		break;
 	}
 	case BASE_JD_REQ_SOFT_JIT_ALLOC:
-		return -EINVAL; /* Temporarily disabled */
 		kbase_jit_allocate_process(katom);
 		break;
 	case BASE_JD_REQ_SOFT_JIT_FREE:
-		return -EINVAL; /* Temporarily disabled */
 		kbase_jit_free_process(katom);
 		break;
 	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
index 4c1535fc0139..ad88b7b69018 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -142,13 +142,7 @@ enum tl_msg_id_obj {
 	KBASE_TL_NDEP_ATOM_ATOM,
 	KBASE_TL_RDEP_ATOM_ATOM,
 	KBASE_TL_ATTRIB_ATOM_CONFIG,
-	KBASE_TL_ATTRIB_ATOM_PRIORITY,
-	KBASE_TL_ATTRIB_ATOM_STATE,
-	KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
 	KBASE_TL_ATTRIB_AS_CONFIG,
-	KBASE_TL_EVENT_LPU_SOFTSTOP,
-	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
-	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
 
 	/* Job dump specific events. */
 	KBASE_JD_GPU_SOFT_RESET
@@ -157,9 +151,11 @@ enum tl_msg_id_obj {
 /* Message ids of trace events that are recorded in the auxiliary stream. */
 enum tl_msg_id_aux {
 	KBASE_AUX_PM_STATE,
+	KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP_EX,
 	KBASE_AUX_PAGEFAULT,
-	KBASE_AUX_PAGESALLOC,
-	KBASE_AUX_DEVFREQ_TARGET
+	KBASE_AUX_PAGESALLOC
 };
 
 /*****************************************************************************/
@@ -433,27 +429,6 @@ static const struct tp_desc tp_desc_obj[] = {
 		"@pLLI",
 		"atom,descriptor,affinity,config"
 	},
-	{
-		KBASE_TL_ATTRIB_ATOM_PRIORITY,
-		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
-		"atom priority",
-		"@pI",
-		"atom,prio"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_STATE,
-		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
-		"atom state",
-		"@pI",
-		"atom,state"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE,
-		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE),
-		"atom caused priority change",
-		"@p",
-		"atom"
-	},
 	{
 		KBASE_TL_ATTRIB_AS_CONFIG,
 		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
@@ -461,27 +436,6 @@ static const struct tp_desc tp_desc_obj[] = {
 		"@pLLL",
 		"address_space,transtab,memattr,transcfg"
 	},
-	{
-		KBASE_TL_EVENT_LPU_SOFTSTOP,
-		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
-		"softstop event on given lpu",
-		"@p",
-		"lpu"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
-		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
-		"atom softstopped",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
-		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
-		"atom softstop issued",
-		"@p",
-		"atom"
-	},
 	{
 		KBASE_JD_GPU_SOFT_RESET,
 		__stringify(KBASE_JD_GPU_SOFT_RESET),
@@ -500,6 +454,27 @@ static const struct tp_desc tp_desc_aux[] = {
 		"@IL",
 		"core_type,core_state_bitset"
 	},
+	{
+		KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+		"Issuing job soft stop",
+		"@p",
+		"atom"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP),
+		"Job soft stop",
+		"@I",
+		"tag_id"
+	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP_EX,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+		"Job soft stop, more details",
+		"@pI",
+		"atom,job_type"
+	},
 	{
 		KBASE_AUX_PAGEFAULT,
 		__stringify(KBASE_AUX_PAGEFAULT),
@@ -513,13 +488,6 @@ static const struct tp_desc tp_desc_aux[] = {
 		"Total alloc pages change",
 		"@IL",
 		"ctx_nr,page_cnt"
-	},
-	{
-		KBASE_AUX_DEVFREQ_TARGET,
-		__stringify(KBASE_AUX_DEVFREQ_TARGET),
-		"New device frequency target",
-		"@L",
-		"target_freq"
 	}
 };
 
@@ -1117,10 +1085,9 @@ static ssize_t kbasep_tlstream_read(
 	ssize_t copy_len = 0;
 
 	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(buffer);
 	KBASE_DEBUG_ASSERT(f_pos);
-
-	if (!buffer)
-		return -EINVAL;
+	CSTD_UNUSED(filp);
 
 	if ((0 > *f_pos) || (PACKET_SIZE > size))
 		return -EINVAL;
@@ -1348,11 +1315,9 @@ void kbase_tlstream_term(void)
 	}
 }
 
-int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags)
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
 {
-	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
-
-	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
 		int rcode;
 
 		*fd = anon_inode_getfd(
@@ -1616,8 +1581,8 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
 void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
 {
 	const u32     msg_id = KBASE_TL_NEW_ATOM;
-	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
-			sizeof(nr);
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
@@ -2046,79 +2011,6 @@ void __kbase_tlstream_tl_attrib_atom_config(
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &prio, sizeof(prio));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &state, sizeof(state));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY_CHANGE;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
 void __kbase_tlstream_tl_attrib_as_config(
 		void *as, u64 transtab, u64 memattr, u64 transcfg)
 {
@@ -2150,11 +2042,11 @@ void __kbase_tlstream_tl_attrib_as_config(
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 {
-	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
+		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
@@ -2167,108 +2059,141 @@ void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
 	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_tlstream_write_timestamp(buffer, pos);
 	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
+			buffer, pos, &gpu, sizeof(gpu));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
+/*****************************************************************************/
+
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
 {
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const u32     msg_id = KBASE_AUX_PM_STATE;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+		sizeof(state);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
 
 	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
+			TL_STREAM_TYPE_AUX,
 			msg_size, &flags);
 	KBASE_DEBUG_ASSERT(buffer);
 
 	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_tlstream_write_timestamp(buffer, pos);
 	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
+			buffer, pos, &core_type, sizeof(core_type));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
 {
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
 
 	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
 	KBASE_DEBUG_ASSERT(buffer);
 
 	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
 {
-	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
+		sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
 
 	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
+			TL_STREAM_TYPE_AUX,
 			msg_size, &flags);
 	KBASE_DEBUG_ASSERT(buffer);
 
 	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-/*****************************************************************************/
-
-void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+		void *katom, u32 job_type)
 {
-	const u32     msg_id = KBASE_AUX_PM_STATE;
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
-		sizeof(state);
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
 
 	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
 	KBASE_DEBUG_ASSERT(buffer);
 
 	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
 	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &core_type, sizeof(core_type));
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+			buffer, pos, &job_type, sizeof(job_type));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u64 jd = katom->jc;
+
+	while (jd != 0) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev,
+				"__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+				jd, (void *)katom);
+			break;
+		}
+		if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+			kbase_vunmap(kctx, &map);
+			break;
+		}
+
+		__kbase_tlstream_aux_job_softstop_ex_record(
+				katom, job->job_type);
+
+		jd = job->job_descriptor_size ?
+			job->next_job._64 : job->next_job._32;
+		kbase_vunmap(kctx, &map);
+	}
+}
+
 void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
 {
 	const u32     msg_id = KBASE_AUX_PAGEFAULT;
@@ -2318,25 +2243,3 @@ void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
-{
-	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
-	const size_t    msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
-	unsigned long   flags;
-	char            *buffer;
-	size_t          pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &target_freq, sizeof(target_freq));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
index e29be71c142f..22a0d96f9a72 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -37,9 +37,8 @@ void kbase_tlstream_term(void);
 
 /**
  * kbase_tlstream_acquire - acquire timeline stream file descriptor
- * @kctx:  kernel common context
- * @fd:    timeline stream file descriptor
- * @flags: timeline stream flags
+ * @kctx: kernel common context
+ * @fd:   timeline stream file descriptor
  *
  * This descriptor is meant to be used by userspace timeline to gain access to
  * kernel timeline stream. This stream is later broadcasted by user space to the
@@ -51,7 +50,7 @@ void kbase_tlstream_term(void);
  * Return: zero on success (this does not necessarily mean that stream
  *         descriptor could be returned), negative number on error
  */
-int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd, u32 flags);
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
 
 /**
  * kbase_tlstream_flush_streams - flush timeline streams.
@@ -103,11 +102,6 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
 
 /*****************************************************************************/
 
-#define TL_ATOM_STATE_IDLE 0
-#define TL_ATOM_STATE_READY 1
-#define TL_ATOM_STATE_DONE 2
-#define TL_ATOM_STATE_POSTED 3
-
 void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
 void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
 void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
@@ -134,38 +128,25 @@ void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
 void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
 void __kbase_tlstream_tl_attrib_atom_config(
 		void *atom, u64 jd, u64 affinity, u32 config);
-void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
-void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
-void __kbase_tlstream_tl_attrib_atom_priority_change(void *atom);
 void __kbase_tlstream_tl_attrib_as_config(
 		void *as, u64 transtab, u64 memattr, u64 transcfg);
-void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
-void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
-void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
 void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
 void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
 void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
 void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
-void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
-
-#define TLSTREAM_ENABLED (1 << 31)
 
 extern atomic_t kbase_tlstream_enabled;
 
 #define __TRACE_IF_ENABLED(trace_name, ...)                         \
 	do {                                                        \
 		int enabled = atomic_read(&kbase_tlstream_enabled); \
-		if (enabled & TLSTREAM_ENABLED)                     \
+		if (enabled)                                        \
 			__kbase_tlstream_##trace_name(__VA_ARGS__); \
 	} while (0)
 
-#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
-	do {                                                            \
-		int enabled = atomic_read(&kbase_tlstream_enabled);     \
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
-			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
-	} while (0)
-
 /*****************************************************************************/
 
 /**
@@ -448,35 +429,6 @@ extern atomic_t kbase_tlstream_enabled;
 #define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
 	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
 
-/**
- * kbase_tlstream_tl_attrib_atom_priority - atom priority
- * @atom: name of the atom object
- * @prio: atom priority
- *
- * Function emits a timeline message containing atom priority.
- */
-#define kbase_tlstream_tl_attrib_atom_priority(atom, prio) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
-
-/**
- * kbase_tlstream_tl_attrib_atom_state - atom state
- * @atom:  name of the atom object
- * @state: atom state
- *
- * Function emits a timeline message containing atom state.
- */
-#define kbase_tlstream_tl_attrib_atom_state(atom, state) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
-
-/**
- * kbase_tlstream_tl_attrib_atom_priority_change - atom caused priority change
- * @atom:  name of the atom object
- *
- * Function emits a timeline message signalling priority change
- */
-#define kbase_tlstream_tl_attrib_atom_priority_change(atom) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority_change, atom)
-
 /**
  * kbase_tlstream_tl_attrib_as_config - address space attributes
  * @as:       assigned address space
@@ -489,27 +441,6 @@ extern atomic_t kbase_tlstream_enabled;
 #define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
 	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
 
-/**
- * kbase_tlstream_tl_event_atom_softstop_ex
- * @atom:       atom identifier
- */
-#define kbase_tlstream_tl_event_atom_softstop_ex(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
-
-/**
- * kbase_tlstream_tl_event_lpu_softstop
- * @lpu:        name of the LPU object
- */
-#define kbase_tlstream_tl_event_lpu_softstop(lpu) \
-	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
-
-/**
- * kbase_tlstream_tl_event_atom_softstop_issue
- * @atom:       atom identifier
- */
-#define kbase_tlstream_tl_event_atom_softstop_issue(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
-
 /**
  * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
  * @gpu:        name of the GPU object
@@ -528,6 +459,34 @@ extern atomic_t kbase_tlstream_enabled;
 #define kbase_tlstream_aux_pm_state(core_type, state) \
 	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
 
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+	__TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+#define kbase_tlstream_aux_job_softstop(js_id) \
+	__TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+	__TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
+
 /**
  * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
  *                                resulting in new pages being mapped
@@ -546,13 +505,5 @@ extern atomic_t kbase_tlstream_enabled;
 #define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
 	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
 
-/**
- * kbase_tlstream_aux_devfreq_target - timeline message: new target DVFS
- *                                     frequency
- * @target_freq: new target frequency
- */
-#define kbase_tlstream_aux_devfreq_target(target_freq) \
-	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
-
 #endif /* _KBASE_TLSTREAM_H */
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
index 5830e87f0818..a606ae810656 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -105,8 +105,6 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
 	.release = seq_release,
 };
 
-#ifdef CONFIG_DEBUG_FS
-
 void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
 {
 	debugfs_create_file("mali_timeline_defs",
@@ -114,12 +112,10 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
 			&kbasep_trace_timeline_debugfs_fops);
 }
 
-#endif /* CONFIG_DEBUG_FS */
-
 void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
 		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
@@ -138,7 +134,7 @@ void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_conte
 		struct kbase_jd_atom *katom, int js,
 		kbasep_js_atom_done_code done_code)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
 		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
@@ -217,7 +213,7 @@ void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timel
 
 void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 	/* Simply log the start of the transition */
 	kbdev->timeline.l2_transitioning = true;
 	KBASE_TIMELINE_POWERING_L2(kbdev);
@@ -225,7 +221,7 @@ void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
 
 void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
 	/* Simply log the end of the transition */
 	if (kbdev->timeline.l2_transitioning) {
 		kbdev->timeline.l2_transitioning = false;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
index 619072f3215c..22a36494e72e 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -28,17 +28,9 @@ enum kbase_trace_timeline_code {
 	#undef KBASE_TIMELINE_TRACE_CODE
 };
 
-#ifdef CONFIG_DEBUG_FS
-
 /** Initialize Timeline DebugFS entries */
 void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
 
-#else /* CONFIG_DEBUG_FS */
-
-#define kbasep_trace_timeline_debugfs_init CSTD_NOP
-
-#endif /* CONFIG_DEBUG_FS */
-
 /* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
  * functions.
  * Output is timestamped by either sched_clock() (default), local_clock(), or
@@ -240,12 +232,13 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
 				count);                                      \
 	} while (0)
 
+
 /* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
 
 /**
  * Trace that an atom is starting on a job slot
  *
- * The caller must be holding hwaccess_lock
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
  */
 void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js);
@@ -264,7 +257,7 @@ void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_con
  * - kbasep_jm_dequeue_submit_slot()
  * - kbasep_jm_dequeue_tail_submit_slot()
  *
- * The caller must be holding hwaccess_lock
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
  */
 void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js,
@@ -328,14 +321,14 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
 static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 }
 
 static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js,
 		kbasep_js_atom_done_code done_code)
 {
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 }
 
 static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h
index 711b09138db3..e880d9663d0e 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_uku.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -56,15 +56,9 @@
  *
  * 10.4:
  * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
- *
- * 10.5:
- * - Reverted to performing mmap in user space so that tools like valgrind work.
- *
- * 10.6:
- * - Add flags input variable to KBASE_FUNC_TLSTREAM_ACQUIRE
  */
 #define BASE_UK_VERSION_MAJOR 10
-#define BASE_UK_VERSION_MINOR 6
+#define BASE_UK_VERSION_MINOR 4
 
 struct kbase_uk_mem_alloc {
 	union uk_header header;
@@ -325,7 +319,6 @@ struct kbase_uk_profiling_controls {
 struct kbase_uk_debugfs_mem_profile_add {
 	union uk_header header;
 	u32 len;
-	u32 padding;
 	union kbase_pointer buf;
 };
 
@@ -340,30 +333,12 @@ struct kbase_uk_context_id {
 /**
  * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
  * @header: UK structure header
- * @flags:  timeline stream flags
  * @fd:     timeline stream file descriptor
  *
- * This structure is used when performing a call to acquire kernel side timeline
- * stream file descriptor.
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
  */
 struct kbase_uk_tlstream_acquire {
-	union uk_header header;
-	/* IN */
-	u32 flags;
-	/* OUT */
-	s32  fd;
-};
-
-/**
- * struct kbase_uk_tlstream_acquire_v10_4 - User/Kernel space data exchange
- *                                          structure
- * @header: UK structure header
- * @fd:     timeline stream file descriptor
- *
- * This structure is used when performing a call to acquire kernel side timeline
- * stream file descriptor.
- */
-struct kbase_uk_tlstream_acquire_v10_4 {
 	union uk_header header;
 	/* IN */
 	/* OUT */
@@ -522,7 +497,7 @@ enum kbase_uk_function_id {
 
 #if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
 	!defined(MALI_MIPE_ENABLED)
-	KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32),
+	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
 #if MALI_UNIT_TEST
 	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
 	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
@@ -540,11 +515,6 @@ enum kbase_uk_function_id {
 
 	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
 
-#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
-	!defined(MALI_MIPE_ENABLED)
-	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40),
-#endif /* MALI_MIPE_ENABLED */
-
 	KBASE_FUNC_MAX
 };
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
index 3adb06ddb266..bd6095f77480 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -29,7 +29,6 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_hwaccess_instr.h>
-#include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_hwcnt_reader.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_tlstream.h>
@@ -1637,9 +1636,11 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
 
 	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
 		return -EINVAL;
+	if (vm_size > size)
+		return -EINVAL;
 
 	offset = vma->vm_pgoff << PAGE_SHIFT;
-	if (vm_size > size - offset)
+	if ((vm_size + offset) > size)
 		return -EINVAL;
 
 	addr = __pa((unsigned long)cli->dump_buffers + offset);
@@ -1686,9 +1687,9 @@ static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
 	unsigned long flags;
 
 	down(&js_devdata->schedule_sem);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_backend_slot_update(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_jm_kick_all(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 	up(&js_devdata->schedule_sem);
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
index de3053b88da8..de6c206f3c71 100644
--- a/drivers/gpu/arm/midgard/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -61,7 +61,6 @@
 #define LATEST_FLUSH            0x038	/* (RO) */
 
 #define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
-#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
 
 #define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
 #define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript
index 7b7ec772ecf3..c36e92dd0d7a 100644
--- a/drivers/gpu/arm/midgard/sconscript
+++ b/drivers/gpu/arm/midgard/sconscript
@@ -13,9 +13,13 @@
 #
 
 
+
+import os
+import re
 import sys
 Import('env')
 
+
 if Glob('tests/sconscript'):
 	SConscript( 'tests/sconscript' )
 
@@ -29,12 +33,18 @@ else:
 	fake_platform_device = 1
 
 # Source files required for kbase.
-kbase_src = [
-	Glob('*.c'),
-	Glob('backend/*/*.c'),
-	Glob('internal/*/*.c'),
-	Glob('platform/%s/*.c' % env['platform_config']),
-]
+kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.c'),
+             Glob('#kernel/drivers/gpu/arm/midgard/backend/*/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/platform/%s/*.c' % (env['platform_config'])),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/*.h'),
+             Glob('#kernel/drivers/gpu/arm/midgard/Makefile',
+             Glob('#kernel/drivers/gpu/arm/midgard/K*'))
+             ]
+
+kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')]
 
 if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
 	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
@@ -43,43 +53,75 @@ if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit
 # we need platform config for GPL version using fake platform
 if fake_platform_device==1:
 	# Check if we are compiling for PBX
-	if env.KernelConfigEnabled("CONFIG_MACH_REALVIEW_PBX") and \
-	   env["platform_config"] in {"vexpress", "vexpress_6xvirtex7_10mhz"}:
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_MACH_REALVIEW_PBX[\ ]*=[\ ]*y'
+	REALVIEW_PBX = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+			REALVIEW_PBX = 1
+			break
+	if REALVIEW_PBX == 1 and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
 		sys.stderr.write("WARNING: Building for a PBX kernel but with platform_config=vexpress*\n")
 	# if the file platform config file is in the tpip directory then use that, otherwise use the default config directory
 	if Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config'])):
 		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/tpip/*%s.c' % (env['platform_config']))
 	else:
 		kbase_src += Glob('#kernel/drivers/gpu/arm/midgard/config/*%s.c' % (env['platform_config']))
+	
+# Note: cleaning via the Linux kernel build system does not yet work
+if env.GetOption('clean') :
+	env.Execute(Action("make clean", '[clean] kbase'))
+	cmd = env.Command(['$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/mali_platform_fake.ko'], kbase_src, [])
+else:
+	if env['os'] == 'android':
+		env['android'] = 1
+	else:
+		env['android'] = 0
+
+	if env['unit'] == '1':
+		env['kernel_test'] = 1
+	else:
+		env['kernel_test'] = 0
+
+	#Extract environment options, note the trailing spaces are important
+	env_options = \
+	"PLATFORM=${platform} " +\
+	"MALI_ERROR_INJECT_ON=${error_inject} " +\
+	"MALI_ANDROID=${android} " +\
+	"MALI_KERNEL_TEST_API=${kernel_test} " +\
+	"MALI_UNIT_TEST=${unit} " +\
+	"MALI_RELEASE_NAME=\"${mali_release_name}\" "+\
+	"MALI_MOCK_TEST=%s " % mock_test +\
+	"MALI_CUSTOMER_RELEASE=${release} " +\
+	"MALI_INSTRUMENTATION_LEVEL=${instr} " +\
+	"MALI_COVERAGE=${coverage} " +\
+	"MALI_BUS_LOG=${buslog} "
+
+	make_action_start = "cd ${SOURCE.dir} && make -j%d " % GetOption('num_jobs')
+	make_action_end = "%s && cp mali_kbase.ko $STATIC_LIB_PATH/mali_kbase.ko" % env.kernel_get_config_defines(fake_platform_device)
+	make_action = make_action_start + env_options + make_action_end
 
-make_args = env.kernel_get_config_defines(ret_list = True,
-                                          fake = fake_platform_device) + [
-	'PLATFORM=%s' % env['platform'],
-	'MALI_ERROR_INJECT_ON=%s' % env['error_inject'],
-	'MALI_KERNEL_TEST_API=%s' % env['unit'],
-	'MALI_UNIT_TEST=%s' % env['unit'],
-	'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
-	'MALI_MOCK_TEST=%s' % mock_test,
-	'MALI_CUSTOMER_RELEASE=%s' % env['release'],
-	'MALI_INSTRUMENTATION_LEVEL=%s' % env['instr'],
-	'MALI_COVERAGE=%s' % env['coverage'],
-	'MALI_BUS_LOG=%s' % env['buslog'],
-]
-
-kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src,
-                              make_args = make_args)
+	makeAction=Action(make_action, '$MAKECOMSTR')
+	cmd = env.Command('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, [makeAction])
 
 # Add a dependency on kds.ko.
 # Only necessary when KDS is not built into the kernel.
 #
 if env['os'] != 'android':
-	if not env.KernelConfigEnabled("CONFIG_KDS"):
-		env.Depends(kbase, '$STATIC_LIB_PATH/kds.ko')
+	linux_config_file = os.path.normpath(os.environ['KDIR']) + '/.config'
+	search_term = '^[\ ]*CONFIG_KDS[\ ]*=[\ ]*y'
+	kds_in_kernel = 0
+	for line in open(linux_config_file, 'r'):
+		if re.search(search_term, line):
+	        # KDS in kernel.
+			kds_in_kernel = 1
+	if not kds_in_kernel:
+		env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/kds.ko')
 
 # need Module.symvers from ump.ko build
 if int(env['ump']) == 1:
-	env.Depends(kbase, '$STATIC_LIB_PATH/ump.ko')
+	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko')
 
-env.KernelObjTarget('kbase', kbase)
+env.KernelObjTarget('kbase', cmd)
 
 env.AppendUnique(BASE=['cutils_linked_list'])