From 69db1b530898a8bfc559bd8035a95c20d54dcdb9 Mon Sep 17 00:00:00 2001
From: Jacob Chen <jacob2.chen@rock-chips.com>
Date: Thu, 12 Jan 2017 11:09:48 +0800
Subject: [PATCH] MALI: rockchip: linux: upgrade to DDK r13p0-00rel0

Since r9p0 can't recover form  error "DATA_INVALID_FAULT",
we have to update to r13p0.

Change-Id: Iac820870159def15dd4c214d0d98f81f81480340
Signed-off-by: Jacob Chen <jacob2.chen@rock-chips.com>
---
 drivers/gpu/arm/midgard_for_linux/Kbuild      |   25 +-
 drivers/gpu/arm/midgard_for_linux/Kconfig     |   87 +-
 drivers/gpu/arm/midgard_for_linux/Makefile    |    6 +-
 .../gpu/mali_kbase_cache_policy_backend.c     |    9 +-
 .../gpu/mali_kbase_cache_policy_backend.h     |   10 +-
 .../backend/gpu/mali_kbase_devfreq.c          |   13 +-
 .../backend/gpu/mali_kbase_device_hw.c        |    6 +-
 .../backend/gpu/mali_kbase_gpu.c              |    7 +-
 .../backend/gpu/mali_kbase_gpuprops_backend.c |   14 +-
 .../backend/gpu/mali_kbase_instr_backend.c    |   88 +-
 .../backend/gpu/mali_kbase_instr_defs.h       |    6 +-
 .../backend/gpu/mali_kbase_irq_linux.c        |    8 +-
 .../backend/gpu/mali_kbase_jm_defs.h          |    9 +-
 .../backend/gpu/mali_kbase_jm_hw.c            |  380 ++--
 .../backend/gpu/mali_kbase_jm_internal.h      |    4 +-
 .../backend/gpu/mali_kbase_jm_rb.c            |  289 ++-
 .../backend/gpu/mali_kbase_js_affinity.c      |   23 +-
 .../backend/gpu/mali_kbase_js_affinity.h      |   10 +-
 .../backend/gpu/mali_kbase_js_backend.c       |   22 +-
 .../backend/gpu/mali_kbase_mmu_hw_direct.c    |   50 +-
 .../backend/gpu/mali_kbase_pm_backend.c       |    3 +-
 .../backend/gpu/mali_kbase_pm_coarse_demand.c |    5 +-
 .../backend/gpu/mali_kbase_pm_defs.h          |   11 +-
 .../backend/gpu/mali_kbase_pm_demand.c        |    5 +-
 .../backend/gpu/mali_kbase_pm_driver.c        |  247 ++-
 .../backend/gpu/mali_kbase_pm_internal.h      |   20 +-
 .../backend/gpu/mali_kbase_pm_policy.c        |   63 +-
 .../gpu/mali_kbase_power_model_simple.c       |   15 +-
 .../gpu/mali_kbase_power_model_simple.h       |    6 +-
 .../backend/gpu/mali_kbase_time.c             |    9 +-
 .../mali_base_hwconfig_features.h             |   36 +-
 .../mali_base_hwconfig_issues.h               |  198 +-
 .../arm/midgard_for_linux/mali_base_kernel.h  |  314 ++-
 .../gpu/arm/midgard_for_linux/mali_kbase.h    |   16 +-
 .../mali_kbase_cache_policy.c                 |   12 +-
 .../arm/midgard_for_linux/mali_kbase_config.h |   19 +-
 .../mali_kbase_config_defaults.h              |   13 +-
 .../midgard_for_linux/mali_kbase_context.c    |   58 +-
 .../midgard_for_linux/mali_kbase_context.h    |   90 +
 .../midgard_for_linux/mali_kbase_core_linux.c | 1750 ++++++++---------
 .../mali_kbase_debug_job_fault.c              |   68 +-
 .../mali_kbase_debug_job_fault.h              |    3 +-
 .../mali_kbase_debug_mem_view.c               |   32 +-
 .../arm/midgard_for_linux/mali_kbase_defs.h   |  384 +++-
 .../arm/midgard_for_linux/mali_kbase_device.c |   31 +-
 .../arm/midgard_for_linux/mali_kbase_event.c  |   59 +-
 .../midgard_for_linux/mali_kbase_gator_api.c  |  239 +--
 .../mali_kbase_gator_hwcnt_names.h            |    6 +-
 .../mali_kbase_gator_hwcnt_names_thex.h       |  291 +++
 .../arm/midgard_for_linux/mali_kbase_gpu_id.h |    8 +-
 .../mali_kbase_gpu_memory_debugfs.c           |    4 +-
 .../mali_kbase_gpu_memory_debugfs.h           |    9 +-
 .../mali_kbase_gpuprops_types.h               |    2 +-
 .../gpu/arm/midgard_for_linux/mali_kbase_hw.c |   49 +-
 .../mali_kbase_hwaccess_jm.h                  |   37 +-
 .../arm/midgard_for_linux/mali_kbase_ipa.c    |    6 +-
 .../gpu/arm/midgard_for_linux/mali_kbase_jd.c |  939 ++++-----
 .../gpu/arm/midgard_for_linux/mali_kbase_js.c |  978 ++++-----
 .../gpu/arm/midgard_for_linux/mali_kbase_js.h |   35 +-
 .../midgard_for_linux/mali_kbase_js_defs.h    |    7 +-
 .../mali_kbase_js_policy_cfs.c                |    7 +-
 .../arm/midgard_for_linux/mali_kbase_mem.c    | 1286 +++++++++++-
 .../arm/midgard_for_linux/mali_kbase_mem.h    |  216 +-
 .../midgard_for_linux/mali_kbase_mem_linux.c  |  753 ++++++-
 .../midgard_for_linux/mali_kbase_mem_linux.h  |  143 +-
 .../midgard_for_linux/mali_kbase_mem_pool.c   |   41 +-
 .../mali_kbase_mem_profile_debugfs.c          |    4 +-
 .../mali_kbase_mem_profile_debugfs.h          |    3 +-
 .../arm/midgard_for_linux/mali_kbase_mmu.c    |  503 +++--
 .../mali_kbase_mmu_mode_lpae.c                |   27 +-
 .../mali_kbase_platform_fake.c                |    6 +-
 .../gpu/arm/midgard_for_linux/mali_kbase_pm.c |   19 +-
 .../mali_kbase_regs_history_debugfs.c         |  130 ++
 .../mali_kbase_regs_history_debugfs.h         |   50 +
 .../arm/midgard_for_linux/mali_kbase_replay.c |  232 ++-
 .../midgard_for_linux/mali_kbase_softjobs.c   | 1227 +++++++++++-
 .../arm/midgard_for_linux/mali_kbase_sync.h   |   11 +-
 .../midgard_for_linux/mali_kbase_sync_user.c  |    3 +-
 .../midgard_for_linux/mali_kbase_tlstream.c   |  330 +++-
 .../midgard_for_linux/mali_kbase_tlstream.h   |  181 +-
 .../mali_kbase_trace_timeline.c               |    4 +-
 .../mali_kbase_trace_timeline.h               |    4 +-
 .../arm/midgard_for_linux/mali_kbase_uku.h    |   74 +-
 .../arm/midgard_for_linux/mali_kbase_vinstr.c |  401 +++-
 .../arm/midgard_for_linux/mali_kbase_vinstr.h |   37 +-
 .../arm/midgard_for_linux/mali_linux_trace.h  |   28 +-
 .../arm/midgard_for_linux/mali_midg_regmap.h  |   78 +-
 .../gpu/arm/midgard_for_linux/mali_timeline.h |    3 +-
 .../devicetree/mali_kbase_config_platform.h   |    8 +-
 .../juno_soc/mali_kbase_config_juno_soc.c     |   86 +-
 .../juno_soc/mali_kbase_config_platform.h     |   10 +-
 .../vexpress/mali_kbase_config_platform.h     |   21 +-
 .../vexpress/mali_kbase_config_vexpress.c     |    2 +-
 .../vexpress/mali_kbase_cpu_vexpress.c        |   77 +-
 .../vexpress/mali_kbase_cpu_vexpress.h        |   26 +-
 .../mali_kbase_config_platform.h              |    8 +-
 .../mali_kbase_config_platform.h              |    8 +-
 drivers/gpu/arm/midgard_for_linux/sconscript  |   15 +-
 98 files changed, 9331 insertions(+), 3874 deletions(-)
 create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h
 create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h
 create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c
 create mode 100644 drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h

diff --git a/drivers/gpu/arm/midgard_for_linux/Kbuild b/drivers/gpu/arm/midgard_for_linux/Kbuild
index aa22f35cc1e2..3c6d8ef78d74 100644
--- a/drivers/gpu/arm/midgard_for_linux/Kbuild
+++ b/drivers/gpu/arm/midgard_for_linux/Kbuild
@@ -16,7 +16,7 @@
 include drivers/gpu/arm/midgard_for_linux/default_config.mk
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r9p0-05rel0"
+MALI_RELEASE_NAME ?= "r13p0-00rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
@@ -89,7 +89,6 @@ SRC := \
 	mali_kbase_context.c \
 	mali_kbase_pm.c \
 	mali_kbase_config.c \
-	mali_kbase_instr.c \
 	mali_kbase_vinstr.c \
 	mali_kbase_softjobs.c \
 	mali_kbase_10969_workaround.c \
@@ -105,25 +104,27 @@ SRC := \
 	mali_kbase_replay.c \
 	mali_kbase_mem_profile_debugfs.c \
 	mali_kbase_mmu_mode_lpae.c \
+	mali_kbase_mmu_mode_aarch64.c \
 	mali_kbase_disjoint_events.c \
 	mali_kbase_gator_api.c \
 	mali_kbase_debug_mem_view.c \
 	mali_kbase_debug_job_fault.c \
 	mali_kbase_smc.c \
 	mali_kbase_mem_pool.c \
-	mali_kbase_mem_pool_debugfs.c
+	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_tlstream.c \
+	mali_kbase_strings.c \
+	mali_kbase_as_fault_debugfs.c
 
-ifeq ($(CONFIG_MALI_MIPE_ENABLED),y)
-	SRC += mali_kbase_tlstream.c
-	ifeq ($(MALI_UNIT_TEST),1)
-		SRC += mali_kbase_tlstream_test.c
-	endif
+ifeq ($(MALI_UNIT_TEST),1)
+	SRC += mali_kbase_tlstream_test.c
 endif
 
 ifeq ($(MALI_CUSTOMER_RELEASE),0)
 	SRC += mali_kbase_regs_dump_debugfs.c
 endif
 
+
 # Job Scheduler Policy: Completely Fair Scheduler
 SRC += mali_kbase_js_policy_cfs.c
 
@@ -201,13 +202,7 @@ obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
 # Tell the Linux build system to enable building of our .c files
 mali_kbase-y := $(SRC:.c=.o)
 
-ifneq ($(wildcard $(src)/internal/Kbuild),)
-ifeq ($(MALI_CUSTOMER_RELEASE),0)
-# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL
-include  $(src)/internal/Kbuild
-mali_kbase-y += $(INTERNAL:.c=.o)
-endif
-endif
+mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o
 
 MALI_BACKEND_PATH ?= backend
 CONFIG_MALI_BACKEND ?= gpu
diff --git a/drivers/gpu/arm/midgard_for_linux/Kconfig b/drivers/gpu/arm/midgard_for_linux/Kconfig
index 4f70e31d7b25..201832b262d4 100644
--- a/drivers/gpu/arm/midgard_for_linux/Kconfig
+++ b/drivers/gpu/arm/midgard_for_linux/Kconfig
@@ -16,6 +16,7 @@
 
 menuconfig MALI_MIDGARD
 	tristate "Mali Midgard series support"
+	select GPU_TRACEPOINTS if ANDROID
 	default n
 	help
 	  Enable this option to build support for a ARM Mali Midgard GPU.
@@ -23,38 +24,15 @@ menuconfig MALI_MIDGARD
 	  To compile this driver as a module, choose M here:
 	  this will generate a single module, called mali_kbase.
 
-choice
-	prompt "Streamline support"
-	depends on MALI_MIDGARD
-	default MALI_TIMELINE_DISABLED
-	help
-	  Select streamline support configuration.
-
-config MALI_TIMELINE_DISABLED
-	bool "Streamline support disabled"
-	help
-	  Disable support for ARM Streamline Performance Analyzer.
-
-	  Timeline support will not be included in
-	  kernel code.
-	  Debug stream will not be generated.
-
 config MALI_GATOR_SUPPORT
 	bool "Streamline support via Gator"
+	depends on MALI_MIDGARD
+	default n
 	help
 	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
 	  You will need the Gator device driver already loaded before loading this driver when enabling
 	  Streamline debug support.
-
-config MALI_MIPE_ENABLED
-	bool "Streamline support via MIPE"
-	help
-	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
-
-	  Stream will be transmitted directly to Mali GPU library.
-	  Compatible version of the library is required to read debug stream generated by kernel.
-
-endchoice
+	  This is a legacy interface required by older versions of Streamline.
 
 config MALI_MIDGARD_DVFS
 	bool "Enable legacy DVFS"
@@ -81,6 +59,15 @@ config MALI_DEVFREQ
 	  governor, the frequency of Mali will be dynamically selected from the
 	  available OPPs.
 
+config MALI_DMA_FENCE
+	bool "DMA_BUF fence support for Mali"
+	depends on MALI_MIDGARD && !KDS
+	default n
+	help
+	  Support DMA_BUF fences for Mali.
+
+	  This option should only be enabled if KDS is not present and
+	  the Linux Kernel has built in support for DMA_BUF fences.
 
 # MALI_EXPERT configuration options
 
@@ -92,13 +79,18 @@ menuconfig MALI_EXPERT
 	  Enabling this option and modifying the default settings may produce a driver with performance or
 	  other limitations.
 
-config MALI_DEBUG_SHADER_SPLIT_FS
-	bool "Allow mapping of shader cores via sysfs"
+config MALI_PRFCNT_SET_SECONDARY
+	bool "Use secondary set of performance counters"
 	depends on MALI_MIDGARD && MALI_EXPERT
 	default n
 	help
-	  Select this option to provide a sysfs entry for runtime configuration of shader
-	  core affinity masks.
+	  Select this option to use secondary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
+
+	  If unsure, say N.
 
 config MALI_PLATFORM_FAKE
 	bool "Enable fake platform device support"
@@ -162,6 +154,23 @@ config MALI_DEBUG
 	help
 	  Select this option for increased checking and reporting of errors.
 
+config MALI_FENCE_DEBUG
+	bool "Debug sync fence usage"
+	depends on MALI_MIDGARD && MALI_EXPERT && SYNC
+	default y if MALI_DEBUG
+	help
+	  Select this option to enable additional checking and reporting on the
+	  use of sync fences in the Mali driver.
+
+	  This will add a 3s timeout to all sync fence waits in the Mali
+	  driver, so that when work for Mali has been waiting on a sync fence
+	  for a long time a debug message will be printed, detailing what fence
+	  is causing the block, and which dependent Mali atoms are blocked as a
+	  result of this.
+
+	  The timeout can be changed at runtime through the js_soft_timeout
+	  device attribute, where the timeout is specified in milliseconds.
+
 config MALI_NO_MALI
 	bool "No Mali"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -198,11 +207,19 @@ config MALI_SYSTEM_TRACE
 	  minimal overhead when not in use. Enable only if you know what
 	  you are doing.
 
-config MALI_GPU_TRACEPOINTS
-	bool "Enable GPU tracepoints"
-	depends on MALI_MIDGARD && ANDROID
-	select GPU_TRACEPOINTS
+config MALI_GPU_MMU_AARCH64
+	bool "Use AArch64 page tables"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
 	help
-	  Enables GPU tracepoints using Android trace event definitions.
+	  Use AArch64 format page tables for the GPU instead of LPAE-style.
+	  The two formats have the same functionality and performance but a
+	  future GPU may deprecate or remove the legacy LPAE-style format.
+
+	  The LPAE-style format is supported on all Midgard and current Bifrost
+	  GPUs. Enabling AArch64 format restricts the driver to only supporting
+	  Bifrost GPUs.
+
+	  If in doubt, say N.
 
 source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/midgard_for_linux/Makefile b/drivers/gpu/arm/midgard_for_linux/Makefile
index d4d5de4cd512..e1625e6dba79 100644
--- a/drivers/gpu/arm/midgard_for_linux/Makefile
+++ b/drivers/gpu/arm/midgard_for_linux/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -26,10 +26,6 @@ ifeq ($(MALI_UNIT_TEST), 1)
 	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
 endif
 
-ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),)
-include $(CURDIR)/internal/Makefile.in
-endif
-
 ifeq ($(MALI_BUS_LOG), 1)
 #Add bus logger symbols
 EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c
index 92a14fa1bae1..c6862539c8dd 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -16,7 +16,12 @@
 
 
 #include "backend/gpu/mali_kbase_cache_policy_backend.h"
-#include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
 
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h
index 42069fc88a1f..fe9869109a82 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,5 +22,13 @@
 #include "mali_kbase.h"
 #include "mali_base_kernel.h"
 
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *			in the GPU.
+  * @kbdev:	Device pointer
+  * @mode:	Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+		u32 mode);
 
 #endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c
index 86227d996257..ad05fe5bea8d 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_devfreq.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -135,6 +135,14 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
 
 	stat->private_data = NULL;
 
+#ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	if (kbdev->devfreq_cooling)
+		memcpy(&kbdev->devfreq_cooling->last_status, stat,
+				sizeof(*stat));
+#endif
+#endif
+
 	return 0;
 }
 
@@ -205,7 +213,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
 	dp = &kbdev->devfreq_profile;
 
 	dp->initial_freq = kbdev->current_freq;
-	dp->polling_ms = 100;
+	/* .KP : set devfreq_dvfs_interval_in_ms */
+	dp->polling_ms = 20;
 	dp->target = kbase_devfreq_target;
 	dp->get_dev_status = kbase_devfreq_status;
 	dp->get_cur_freq = kbase_devfreq_cur_freq;
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c
index 83d5ec9f7a93..b9238a305177 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_device_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,9 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
 	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
 	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
 	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+
 	writel(value, kbdev->reg + offset);
+
 	if (kctx && kctx->jctx.tb)
 		kbase_device_trace_register_access(kctx, REG_WRITE, offset,
 									value);
@@ -48,7 +50,9 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
 	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+
 	val = readl(kbdev->reg + offset);
+
 	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
 	if (kctx && kctx->jctx.tb)
 		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c
index 72a98d0f7952..d578fd78e825 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
  * Register-based HW access backend APIs
  */
 #include <mali_kbase.h>
-#include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_hwaccess_backend.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -81,7 +80,6 @@ int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_timer;
 
-/* Currently disabled on the prototype */
 #ifdef CONFIG_MALI_DEBUG
 #ifndef CONFIG_MALI_NO_MALI
 	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
@@ -101,12 +99,13 @@ int kbase_backend_late_init(struct kbase_device *kbdev)
 	return 0;
 
 fail_job_slot:
-/* Currently disabled on the prototype */
+
 #ifdef CONFIG_MALI_DEBUG
 #ifndef CONFIG_MALI_NO_MALI
 fail_interrupt_test:
 #endif /* !CONFIG_MALI_NO_MALI */
 #endif /* CONFIG_MALI_DEBUG */
+
 	kbase_backend_timer_term(kbdev);
 fail_timer:
 	kbase_hwaccess_pm_halt(kbdev);
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c
index 705b1ebfa87f..d410cd297889 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -86,8 +86,20 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump)
 {
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+		/* Ensure we can access the GPU registers */
+		kbase_pm_register_access_enable(kbdev);
+
+		regdump->coherency_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+		/* We're done accessing the GPU registers for now. */
+		kbase_pm_register_access_disable(kbdev);
+	} else {
+		/* Pre COHERENCY_FEATURES we only supported ACE_LITE */
 		regdump->coherency_features =
 				COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
 				COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+	}
 }
 
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c
index 2c987071a77c..3f06a10f7fed 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,6 +23,7 @@
 
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
+#include <mali_kbase_hwaccess_instr.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_instr_internal.h>
@@ -40,14 +41,6 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
 	u32 irq_mask;
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	/* Wait for any reset to complete */
-	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
-				kbdev->hwcnt.backend.state !=
-						KBASE_INSTR_STATE_RESETTING);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
 					KBASE_INSTR_STATE_REQUEST_CLEAN);
 
@@ -74,18 +67,14 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 {
 	unsigned long flags, pm_flags;
 	int err = -EINVAL;
-	struct kbasep_js_device_data *js_devdata;
 	u32 irq_mask;
 	int ret;
 	u64 shader_cores_needed;
-
-	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+	u32 prfcnt_config;
 
 	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
 							KBASE_PM_CORE_SHADER);
 
-	js_devdata = &kbdev->js_data;
-
 	/* alignment failure */
 	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
 		goto out_err;
@@ -100,14 +89,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
-	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		/* GPU is being reset */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
-
 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
 		/* Instrumentation is already enabled */
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -125,10 +106,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	kbdev->hwcnt.kctx = kctx;
 	/* Remember the dump address so we can reprogram it later */
 	kbdev->hwcnt.addr = setup->dump_buffer;
-	/* Remember all the settings for suspend/resume */
-	if (&kbdev->hwcnt.suspended_state != setup)
-		memcpy(&kbdev->hwcnt.suspended_state, setup,
-					sizeof(kbdev->hwcnt.suspended_state));
 
 	/* Request the clean */
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
@@ -151,9 +128,22 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 	kbase_pm_request_l2_caches(kbdev);
 
 	/* Configure */
+	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	{
+		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+		if (arch_v6)
+			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+	}
+#endif
+
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-					(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT)
-					| PRFCNT_CONFIG_MODE_OFF, kctx);
+			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
 					setup->dump_buffer & 0xFFFFFFFF, kctx);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
@@ -174,8 +164,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 							setup->tiler_bm, kctx);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-				PRFCNT_CONFIG_MODE_MANUAL, kctx);
+			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
 
 	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
 	 */
@@ -185,14 +174,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
-	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		/* GPU is being reset */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
-
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
 	kbdev->hwcnt.backend.triggered = 1;
 	wake_up(&kbdev->hwcnt.backend.wait);
@@ -359,15 +340,11 @@ void kbasep_cache_clean_worker(struct work_struct *data)
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 	/* Wait for our condition, and any reset to complete */
-	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING ||
-			kbdev->hwcnt.backend.state ==
-						KBASE_INSTR_STATE_CLEANING) {
+	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
-				(kbdev->hwcnt.backend.state !=
-						KBASE_INSTR_STATE_RESETTING &&
 				kbdev->hwcnt.backend.state !=
-						KBASE_INSTR_STATE_CLEANING));
+						KBASE_INSTR_STATE_CLEANING);
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 	}
 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
@@ -400,9 +377,6 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
 					&kbdev->hwcnt.backend.cache_clean_work);
 		KBASE_DEBUG_ASSERT(ret);
 	}
-	/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
-	 * and the instrumentation state hasn't been restored yet -
-	 * kbasep_reset_timeout_worker() will do the rest of the work */
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 }
@@ -430,10 +404,6 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
 			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
 			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
 		}
-		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a
-		 * reset, and the instrumentation state hasn't been restored yet
-		 * - kbasep_reset_timeout_worker() will do the rest of the work
-		 */
 
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 	}
@@ -451,14 +421,6 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
-	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		/* GPU is being reset */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
-
 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
 		err = -EINVAL;
 		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
@@ -482,14 +444,6 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
-	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		/* GPU is being reset */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
-
 	/* Check it's the context previously set up and we're not already
 	 * dumping */
 	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h
index 23bd80a5a150..4794672da8f0 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_instr_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,10 +39,6 @@ enum kbase_instr_state {
 	/* Cache clean completed, and either a) a dump is complete, or
 	 * b) instrumentation can now be setup. */
 	KBASE_INSTR_STATE_CLEANED,
-	/* kbasep_reset_timeout_worker() has started (but not compelted) a
-	 * reset. This generally indicates the current action should be aborted,
-	 * and kbasep_reset_timeout_worker() will handle the cleanup */
-	KBASE_INSTR_STATE_RESETTING,
 	/* An error has occured during DUMPING (page fault). */
 	KBASE_INSTR_STATE_FAULT
 };
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c
index 49c72f90aac6..b891b12a3299 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_irq_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,6 @@
 #define MMU_IRQ_TAG	1
 #define GPU_IRQ_TAG	2
 
-
 static void *kbase_tag(void *ptr, u32 tag)
 {
 	return (void *)(((uintptr_t) ptr) | tag);
@@ -39,9 +38,6 @@ static void *kbase_untag(void *ptr)
 	return (void *)(((uintptr_t) ptr) & ~3);
 }
 
-
-
-
 static irqreturn_t kbase_job_irq_handler(int irq, void *data)
 {
 	unsigned long flags;
@@ -151,13 +147,13 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
 
 	return IRQ_HANDLED;
 }
+
 static irq_handler_t kbase_handler_table[] = {
 	[JOB_IRQ_TAG] = kbase_job_irq_handler,
 	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
 	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
 };
 
-
 #ifdef CONFIG_MALI_DEBUG
 #define  JOB_IRQ_HANDLER JOB_IRQ_TAG
 #define  MMU_IRQ_HANDLER MMU_IRQ_TAG
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h
index 8ccc440171a2..83d477898c5e 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,6 +71,7 @@ struct slot_rb {
  * @reset_work:			Work item for performing the reset
  * @reset_wait:			Wait event signalled when the reset is complete
  * @reset_timer:		Timeout for soft-stops before the reset
+ * @timeouts_updated:           Have timeout values just been updated?
  *
  * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
  * accessing this structure
@@ -97,11 +98,15 @@ struct kbase_backend_data {
 /* The GPU reset process is currently occuring (timeout has expired or
  * kbasep_try_reset_gpu_early was called) */
 #define KBASE_RESET_GPU_HAPPENING       3
-
+/* Reset the GPU silently, used when resetting the GPU as part of normal
+ * behavior (e.g. when exiting protected mode). */
+#define KBASE_RESET_GPU_SILENT          4
 	struct workqueue_struct *reset_workq;
 	struct work_struct reset_work;
 	wait_queue_head_t reset_wait;
 	struct hrtimer reset_timer;
+
+	bool timeouts_updated;
 };
 
 /**
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c
index 33d6aef0ec72..00900a99a898 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,8 +15,6 @@
 
 
 
-
-
 /*
  * Base kernel job manager APIs
  */
@@ -27,11 +25,9 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
+#include <mali_kbase_vinstr.h>
 #include <mali_kbase_hw.h>
-#include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
@@ -87,14 +83,31 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 	 * start */
 	cfg = kctx->as_nr;
 
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
+
 #ifndef CONFIG_MALI_COH_GPU
-	cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
-	cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
-#endif
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START))
+		cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END))
+		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+#endif /* CONFIG_MALI_COH_GPU */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) ||
+		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982))
+		cfg |= JS_CONFIG_START_MMU;
 
-	cfg |= JS_CONFIG_START_MMU;
 	cfg |= JS_CONFIG_THREAD_PRI(8);
 
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+		(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED))
+		cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
 	if (kbase_hw_has_feature(kbdev,
 				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
 		if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
@@ -111,6 +124,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
 
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+				katom->flush_id, kctx);
 
 	/* Write an approximate start timestamp.
 	 * It's approximate because there might be a job in the HEAD register.
@@ -130,7 +146,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
 				kctx, kbase_jd_atom_id(kctx, katom));
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
 			katom->affinity, cfg);
 	kbase_tlstream_tl_ret_ctx_lpu(
@@ -142,7 +157,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 			katom,
 			&kbdev->gpu_props.props.raw_props.js_features[js],
 			"ctx_nr,atom_nr");
-#endif
 #ifdef CONFIG_GPU_TRACEPOINTS
 	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
 		/* If this is the only job on the slot, trace it as starting */
@@ -204,6 +218,24 @@ static void kbasep_job_slot_update_head_start_timestamp(
 	}
 }
 
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @i: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+{
+	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+
+	kbase_tlstream_tl_nret_atom_lpu(katom,
+		&kbdev->gpu_props.props.raw_props.js_features[i]);
+}
+
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
 {
 	unsigned long flags;
@@ -264,9 +296,12 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 						GATOR_JOB_SLOT_SOFT_STOPPED, i),
 								NULL, 0);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 					kbase_tlstream_aux_job_softstop(i);
-#endif
+
+					kbasep_trace_tl_nret_atom_lpu(
+						kbdev, i);
+
 					/* Soft-stopped job - read the value of
 					 * JS<n>_TAIL so that the job chain can
 					 * be resumed */
@@ -437,19 +472,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 KBASE_EXPORT_TEST_API(kbase_job_done);
 
 static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
-								u16 core_reqs)
+					struct kbase_jd_atom *katom)
 {
 	bool soft_stops_allowed = true;
 
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
-		if ((core_reqs & BASE_JD_REQ_T) != 0)
+	if (kbase_jd_katom_is_protected(katom)) {
+		soft_stops_allowed = false;
+	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((katom->core_req & BASE_JD_REQ_T) != 0)
 			soft_stops_allowed = false;
 	}
 	return soft_stops_allowed;
 }
 
 static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
-								u16 core_reqs)
+						base_jd_core_req core_reqs)
 {
 	bool hard_stops_allowed = true;
 
@@ -463,7 +500,7 @@ static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 					int js,
 					u32 action,
-					u16 core_reqs,
+					base_jd_core_req core_reqs,
 					struct kbase_jd_atom *target_katom)
 {
 	struct kbase_context *kctx = target_katom->kctx;
@@ -486,12 +523,13 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 
 	if (action == JS_COMMAND_SOFT_STOP) {
 		bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
-								core_reqs);
+								target_katom);
 
 		if (!soft_stop_allowed) {
 #ifdef CONFIG_MALI_DEBUG
-			dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
-						(unsigned int)core_reqs);
+			dev_dbg(kbdev->dev,
+					"Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
 #endif				/* CONFIG_MALI_DEBUG */
 			return;
 		}
@@ -499,9 +537,51 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 		/* We are about to issue a soft stop, so mark the atom as having
 		 * been soft stopped */
 		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
-	}
 
-	if (action == JS_COMMAND_HARD_STOP) {
+		/* Mark the point where we issue the soft-stop command */
+		kbase_tlstream_aux_issue_job_softstop(target_katom);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			int i;
+
+			for (i = 0;
+			     i < kbase_backend_nr_atoms_submitted(kbdev, js);
+			     i++) {
+				struct kbase_jd_atom *katom;
+
+				katom = kbase_gpu_inspect(kbdev, js, i);
+
+				KBASE_DEBUG_ASSERT(katom);
+
+				/* For HW_ISSUE_8316, only 'bad' jobs attacking
+				 * the system can cause this issue: normally,
+				 * all memory should be allocated in multiples
+				 * of 4 pages, and growable memory should be
+				 * changed size in multiples of 4 pages.
+				 *
+				 * Whilst such 'bad' jobs can be cleared by a
+				 * GPU reset, the locking up of a uTLB entry
+				 * caused by the bad job could also stall other
+				 * ASs, meaning that other ASs' jobs don't
+				 * complete in the 'grace' period before the
+				 * reset. We don't want to lose other ASs' jobs
+				 * when they would normally complete fine, so we
+				 * must 'poke' the MMU regularly to help other
+				 * ASs complete */
+				kbase_as_poking_timer_retain_atom(
+						kbdev, katom->kctx, katom);
+			}
+		}
+
+		if (kbase_hw_has_feature(
+				kbdev,
+				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+			action = (target_katom->atom_flags &
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_SOFT_STOP_1 :
+				JS_COMMAND_SOFT_STOP_0;
+		}
+	} else if (action == JS_COMMAND_HARD_STOP) {
 		bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
 								core_reqs);
 
@@ -525,55 +605,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 			 * hard-stop fails, so it is safe to just return and
 			 * ignore the hard-stop request.
 			 */
-			dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
-						(unsigned int)core_reqs);
+			dev_warn(kbdev->dev,
+					"Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+					(unsigned int)core_reqs);
 			return;
 		}
 		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
-	}
-
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) &&
-					action == JS_COMMAND_SOFT_STOP) {
-		int i;
 
-		for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js);
-									i++) {
-			struct kbase_jd_atom *katom;
-
-			katom = kbase_gpu_inspect(kbdev, js, i);
-
-			KBASE_DEBUG_ASSERT(katom);
-
-			/* For HW_ISSUE_8316, only 'bad' jobs attacking the
-			 * system can cause this issue: normally, all memory
-			 * should be allocated in multiples of 4 pages, and
-			 * growable memory should be changed size in multiples
-			 * of 4 pages.
-			 *
-			 * Whilst such 'bad' jobs can be cleared by a GPU reset,
-			 * the locking up of a uTLB entry caused by the bad job
-			 * could also stall other ASs, meaning that other ASs'
-			 * jobs don't complete in the 'grace' period before the
-			 * reset. We don't want to lose other ASs' jobs when
-			 * they would normally complete fine, so we must 'poke'
-			 * the MMU regularly to help other ASs complete */
-			kbase_as_poking_timer_retain_atom(kbdev, katom->kctx,
-									katom);
-		}
-	}
-
-	if (kbase_hw_has_feature(kbdev,
+		if (kbase_hw_has_feature(
+				kbdev,
 				BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
-		if (action == JS_COMMAND_SOFT_STOP)
-			action = (target_katom->atom_flags &
-						KBASE_KATOM_FLAGS_JOBCHAIN) ?
-					JS_COMMAND_SOFT_STOP_1 :
-					JS_COMMAND_SOFT_STOP_0;
-		else
 			action = (target_katom->atom_flags &
-						KBASE_KATOM_FLAGS_JOBCHAIN) ?
-					JS_COMMAND_HARD_STOP_1 :
-					JS_COMMAND_HARD_STOP_0;
+					KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				JS_COMMAND_HARD_STOP_1 :
+				JS_COMMAND_HARD_STOP_0;
+		}
 	}
 
 	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
@@ -699,7 +745,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 				struct kbase_jd_atom *target_katom)
 {
 	struct kbase_device *kbdev;
-	struct kbasep_js_device_data *js_devdata;
 	int js = target_katom->slot_nr;
 	int priority = target_katom->sched_priority;
 	int i;
@@ -707,7 +752,6 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	kbdev = kctx->kbdev;
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	js_devdata = &kbdev->js_data;
 
 	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
@@ -825,6 +869,13 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
 {
 	u32 flush_id = 0;
 
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.backend.gpu_powered)
+			flush_id = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+		mutex_unlock(&kbdev->pm.lock);
+	}
 
 	return flush_id;
 }
@@ -1032,7 +1083,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
  * state when the soft/hard-stop action is complete
  */
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
-		u16 core_reqs, struct kbase_jd_atom *target_katom)
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
 {
 	u32 hw_action = action & JS_COMMAND_MASK;
 
@@ -1044,7 +1095,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
 	 * causing disjoint */
 	if (hw_action == JS_COMMAND_SOFT_STOP &&
-			!(kbasep_soft_stop_allowed(kbdev, core_reqs) &&
+			!(kbasep_soft_stop_allowed(kbdev, target_katom) &&
 			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
 		return;
 
@@ -1109,26 +1160,6 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
 }
 
-static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
-				struct kbase_context *kctx,
-				struct kbase_uk_hwcnt_setup *hwcnt_setup)
-{
-	hwcnt_setup->dump_buffer =
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) &
-								0xffffffff;
-	hwcnt_setup->dump_buffer |= (u64)
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) <<
-									32;
-	hwcnt_setup->jm_bm =
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
-	hwcnt_setup->shader_bm =
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
-	hwcnt_setup->tiler_bm =
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
-	hwcnt_setup->mmu_l2_bm =
-		kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
-}
-
 static void kbasep_reset_timeout_worker(struct work_struct *data)
 {
 	unsigned long flags, mmu_flags;
@@ -1136,10 +1167,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	int i;
 	ktime_t end_timestamp = ktime_get();
 	struct kbasep_js_device_data *js_devdata;
-	struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
-	enum kbase_instr_state bckp_state;
 	bool try_schedule = false;
-	bool restore_hwc = false;
+	bool silent = false;
 
 	KBASE_DEBUG_ASSERT(data);
 
@@ -1149,8 +1178,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	KBASE_DEBUG_ASSERT(kbdev);
 	js_devdata = &kbdev->js_data;
 
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_SILENT)
+		silent = true;
+
 	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
 
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
 	/* Make sure the timer has completed - this cannot be done from
 	 * interrupt context, so this cannot be done within
 	 * kbasep_try_reset_gpu_early. */
@@ -1200,39 +1237,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	 * assume that anything that is still left on the GPU is stuck there and
 	 * we'll kill it when we reset the GPU */
 
-	dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+	if (!silent)
+		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
 								RESET_TIMEOUT);
 
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-
-	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
-		/* the same interrupt handler preempted itself */
-		/* GPU is being reset */
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
-	/* Save the HW counters setup */
-	if (kbdev->hwcnt.kctx != NULL) {
-		struct kbase_context *kctx = kbdev->hwcnt.kctx;
-
-		if (kctx->jctx.sched_info.ctx.is_scheduled) {
-			kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup);
-
-			restore_hwc = true;
-		}
-	}
-
 	/* Output the state of some interesting registers to help in the
 	 * debugging of GPU resets */
-	kbase_debug_dump_registers(kbdev);
-
-	bckp_state = kbdev->hwcnt.backend.state;
-	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
-	kbdev->hwcnt.backend.triggered = 0;
-
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	if (!silent)
+		kbase_debug_dump_registers(kbdev);
 
 	/* Reset the GPU */
 	kbase_pm_init_hw(kbdev, 0);
@@ -1272,101 +1284,14 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	kbase_disjoint_state_down(kbdev);
 
 	wake_up(&kbdev->hwaccess.backend.reset_wait);
-	dev_err(kbdev->dev, "Reset complete");
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
 
 	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
 		try_schedule = true;
 
 	mutex_unlock(&js_devdata->runpool_mutex);
 
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	/* Restore the HW counters setup */
-	if (restore_hwc) {
-		struct kbase_context *kctx = kbdev->hwcnt.kctx;
-
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-				PRFCNT_CONFIG_MODE_OFF, kctx);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
-				hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
-				hwcnt_setup.dump_buffer >> 32,        kctx);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
-				hwcnt_setup.jm_bm,                    kctx);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
-				hwcnt_setup.shader_bm,                kctx);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
-				hwcnt_setup.mmu_l2_bm,                kctx);
-
-		/* Due to PRLAM-8186 we need to disable the Tiler before we
-		 * enable the HW counter dump. */
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
-			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-						0, kctx);
-		else
-			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-						hwcnt_setup.tiler_bm, kctx);
-
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-				(kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-				PRFCNT_CONFIG_MODE_MANUAL, kctx);
-
-		/* If HW has PRLAM-8186 we can now re-enable the tiler HW
-		 * counters dump */
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
-			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
-						hwcnt_setup.tiler_bm, kctx);
-	}
-	kbdev->hwcnt.backend.state = bckp_state;
-	switch (kbdev->hwcnt.backend.state) {
-	/* Cases for waking kbasep_cache_clean_worker worker */
-	case KBASE_INSTR_STATE_CLEANED:
-		/* Cache-clean IRQ occurred, but we reset:
-		 * Wakeup incase the waiter saw RESETTING */
-	case KBASE_INSTR_STATE_REQUEST_CLEAN:
-		/* After a clean was requested, but before the regs were
-		 * written:
-		 * Wakeup incase the waiter saw RESETTING */
-		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
-		break;
-	case KBASE_INSTR_STATE_CLEANING:
-		/* Either:
-		 * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
-		 * 2) We got it whilst resetting: it was voluntarily lost
-		 *
-		 * So, move to the next state and wakeup: */
-		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
-		wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
-		break;
-
-	/* Cases for waking anyone else */
-	case KBASE_INSTR_STATE_DUMPING:
-		/* If dumping, abort the dump, because we may've lost the IRQ */
-		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
-		kbdev->hwcnt.backend.triggered = 1;
-		wake_up(&kbdev->hwcnt.backend.wait);
-		break;
-	case KBASE_INSTR_STATE_DISABLED:
-	case KBASE_INSTR_STATE_IDLE:
-	case KBASE_INSTR_STATE_FAULT:
-		/* Every other reason: wakeup in that state */
-		kbdev->hwcnt.backend.triggered = 1;
-		wake_up(&kbdev->hwcnt.backend.wait);
-		break;
-
-	/* Unhandled cases */
-	case KBASE_INSTR_STATE_RESETTING:
-	default:
-		BUG();
-		break;
-	}
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-
-	/* Resume the vinstr core */
-	kbase_vinstr_hwc_resume(kbdev->vinstr_ctx);
-
-	/* Note: counter dumping may now resume */
-
 	mutex_lock(&kbdev->pm.lock);
 
 	/* Find out what cores are required now */
@@ -1386,6 +1311,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
 	}
 
 	kbase_pm_context_idle(kbdev);
+
+	/* Release vinstr */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
+
 	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
 }
 
@@ -1467,7 +1396,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
  *
  * Return:
  *   The function returns a boolean which should be interpreted as follows:
- *   true - Prepared for reset, kbase_reset_gpu should be called.
+ *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
  *   false - Another thread is performing a reset, kbase_reset_gpu should
  *   not be called.
  */
@@ -1561,4 +1490,29 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev)
 	/* Try resetting early */
 	kbasep_try_reset_gpu_early_locked(kbdev);
 }
+
+void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+{
+	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING,
+						KBASE_RESET_GPU_SILENT) !=
+						KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	queue_work(kbdev->hwaccess.backend.reset_workq,
+			&kbdev->hwaccess.backend.reset_work);
+}
+
+bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+{
+	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+			KBASE_RESET_GPU_NOT_PENDING)
+		return false;
+
+	return true;
+}
 #endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h
index eb068d40283b..8f1e5615ea43 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -96,7 +96,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 					int js,
 					u32 action,
-					u16 core_reqs,
+					base_jd_core_req core_reqs,
 					struct kbase_jd_atom *target_katom);
 
 /**
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c
index c0168c74f815..da7c4df7d277 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_jm_rb.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,11 +24,11 @@
 #include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_jm.h>
 #include <mali_kbase_js.h>
+#include <mali_kbase_tlstream.h>
 #include <mali_kbase_10969_workaround.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_affinity.h>
-#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 /* Return whether the specified ringbuffer is empty. HW access lock must be
@@ -592,7 +592,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 	case KBASE_ATOM_GPU_RB_READY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
-	case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
 	case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
@@ -603,6 +603,9 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 		break;
 
+	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
 	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
@@ -654,53 +657,145 @@ static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
 	return true;
 }
 
-static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
+static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev)
 {
-	return kbdev->secure_mode;
+	return kbdev->protected_mode;
 }
 
-static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
+static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev)
 {
 	int err = -EINVAL;
 
 	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
-	WARN_ONCE(!kbdev->secure_ops,
-			"Cannot enable secure mode: secure callbacks not specified.\n");
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot enter protected mode: protected callbacks not specified.\n");
 
-	if (kbdev->secure_ops) {
-		/* Switch GPU to secure mode */
-		err = kbdev->secure_ops->secure_mode_enable(kbdev);
+	if (kbdev->protected_ops) {
+		/* Switch GPU to protected mode */
+		err = kbdev->protected_ops->protected_mode_enter(kbdev);
 
 		if (err)
-			dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
+			dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
+					err);
 		else
-			kbdev->secure_mode = true;
+			kbdev->protected_mode = true;
 	}
 
 	return err;
 }
 
-static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
+static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev)
 {
-	int err = -EINVAL;
-
 	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
-	WARN_ONCE(!kbdev->secure_ops,
-			"Cannot disable secure mode: secure callbacks not specified.\n");
+	WARN_ONCE(!kbdev->protected_ops,
+			"Cannot exit protected mode: protected callbacks not specified.\n");
 
-	if (kbdev->secure_ops) {
-		/* Switch GPU to non-secure mode */
-		err = kbdev->secure_ops->secure_mode_disable(kbdev);
+	if (!kbdev->protected_ops)
+		return -EINVAL;
 
-		if (err)
-			dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
-		else
-			kbdev->secure_mode = false;
+	kbdev->protected_mode_transition = true;
+	kbase_reset_gpu_silent(kbdev);
+
+	return 0;
+}
+
+static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
+		struct kbase_jd_atom **katom, int idx, int js)
+{
+	int err = 0;
+
+	switch (katom[idx]->exit_protected_state) {
+	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
+		/*
+		 * If the atom ahead of this one hasn't got to being
+		 * submitted yet then bail.
+		 */
+		if (idx == 1 &&
+			(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+			katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+			return -EAGAIN;
+
+		/* If we're not exiting protected mode then we're done here. */
+		if (!(kbase_gpu_in_protected_mode(kbdev) &&
+				!kbase_jd_katom_is_protected(katom[idx])))
+			return 0;
+
+		/*
+		 * If there is a transition in progress, or work still
+		 * on the GPU try again later.
+		 */
+		if (kbdev->protected_mode_transition ||
+				kbase_gpu_atoms_submitted_any(kbdev))
+			return -EAGAIN;
+
+		/*
+		 * Exiting protected mode requires a reset, but first the L2
+		 * needs to be powered down to ensure it's not active when the
+		 * reset is issued.
+		 */
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
+		if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) ||
+				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+			/*
+			 * The L2 is still powered, wait for all the users to
+			 * finish with it before doing the actual reset.
+			 */
+			return -EAGAIN;
+		}
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_RESET;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* Issue the reset to the GPU */
+		err = kbase_gpu_protected_mode_reset(kbdev);
+		if (err) {
+			/* Failed to exit protected mode, fail atom */
+			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+			kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+			/* Only return if head atom or previous atom
+			 * already removed - as atoms must be returned
+			 * in order */
+			if (idx == 0 || katom[0]->gpu_rb_state ==
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+				kbase_gpu_dequeue_atom(kbdev, js, NULL);
+				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+			}
+
+			kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+			return -EINVAL;
+		}
+
+		katom[idx]->exit_protected_state =
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
+
+		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
+		if (kbase_reset_gpu_active(kbdev))
+			return -EAGAIN;
+
+		/* protected mode sanity checks */
+		KBASE_DEBUG_ASSERT_MSG(
+			kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+			kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
+		KBASE_DEBUG_ASSERT_MSG(
+			(kbase_jd_katom_is_protected(katom[idx]) && js == 0) ||
+			!kbase_jd_katom_is_protected(katom[idx]),
+			"Protected atom on JS%d not supported", js);
 	}
 
-	return err;
+	return 0;
 }
 
 void kbase_gpu_slot_update(struct kbase_device *kbdev)
@@ -719,6 +814,7 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 
 		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
 			bool cores_ready;
+			int ret;
 
 			if (!katom[idx])
 				continue;
@@ -735,10 +831,48 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 					break;
 
 				katom[idx]->gpu_rb_state =
-				KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT;
 
 			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT:
+				/*
+				 * Exiting protected mode must be done before
+				 * the references on the cores are taken as
+				 * a power down the L2 is required which
+				 * can't happen after the references for this
+				 * atom are taken.
+				 */
+				ret = kbase_jm_exit_protected_mode(kbdev,
+						katom, idx, js);
+				if (ret)
+					break;
+
+				katom[idx]->gpu_rb_state =
+					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
 			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+				if (katom[idx]->will_fail_event_code) {
+					kbase_gpu_mark_atom_for_return(kbdev,
+							katom[idx]);
+					/* Set EVENT_DONE so this atom will be
+					   completed, not unpulled. */
+					katom[idx]->event_code =
+						BASE_JD_EVENT_DONE;
+					/* Only return if head atom or previous
+					 * atom already removed - as atoms must
+					 * be returned in order. */
+					if (idx == 0 ||	katom[0]->gpu_rb_state ==
+							KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+						kbase_gpu_dequeue_atom(kbdev, js, NULL);
+						kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+					}
+					break;
+				}
+
+
 				cores_ready =
 					kbasep_js_job_check_ref_cores(kbdev, js,
 								katom[idx]);
@@ -765,12 +899,28 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 					break;
 
 				katom[idx]->gpu_rb_state =
-					KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
+					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY;
 
 			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
-			case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
-				if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
+			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY:
+
+				/* Only submit if head atom or previous atom
+				 * already submitted */
+				if (idx == 1 &&
+					(katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+					katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+					break;
+
+				/*
+				 * If the GPU is transitioning protected mode
+				 * then bail now and we'll be called when the
+				 * new state has settled.
+				 */
+				if (kbdev->protected_mode_transition)
+					break;
+
+				if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) {
 					int err = 0;
 
 					/* Not in correct mode, take action */
@@ -784,16 +934,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 						 */
 						break;
 					}
-
-					/* No jobs running, so we can switch GPU mode right now */
-					if (kbase_jd_katom_is_secure(katom[idx])) {
-						err = kbase_gpu_secure_mode_enable(kbdev);
-					} else {
-						err = kbase_gpu_secure_mode_disable(kbdev);
+					if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
+						/*
+						 * We can't switch now because
+						 * the vinstr core state switch
+						 * is not done yet.
+						 */
+						break;
 					}
+					/* Once reaching this point GPU must be
+					 * switched to protected mode or vinstr
+					 * re-enabled. */
 
+					/* No jobs running, so we can switch GPU mode right now */
+					err = kbase_gpu_protected_mode_enter(kbdev);
 					if (err) {
-						/* Failed to switch secure mode, fail atom */
+						/*
+						 * Failed to switch into protected mode, resume
+						 * vinstr core and fail atom.
+						 */
+						kbase_vinstr_resume(kbdev->vinstr_ctx);
 						katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
 						kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
 						/* Only return if head atom or previous atom
@@ -808,22 +968,18 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 					}
 				}
 
-				/* Secure mode sanity checks */
+				/* Protected mode sanity checks */
 				KBASE_DEBUG_ASSERT_MSG(
-					kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
-					"Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
-					kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
-				KBASE_DEBUG_ASSERT_MSG(
-					(kbase_jd_katom_is_secure(katom[idx]) && js == 0) ||
-					!kbase_jd_katom_is_secure(katom[idx]),
-					"Secure atom on JS%d not supported", js);
-
+					kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev),
+					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+					kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
 				katom[idx]->gpu_rb_state =
 					KBASE_ATOM_GPU_RB_READY;
 
 			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
 			case KBASE_ATOM_GPU_RB_READY:
+
 				/* Only submit if head atom or previous atom
 				 * already submitted */
 				if (idx == 1 &&
@@ -944,8 +1100,16 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 	}
 
 	katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
-
 	kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+	kbase_tlstream_tl_nret_atom_lpu(
+			katom,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
+	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
+	kbase_tlstream_tl_nret_ctx_lpu(
+			kctx,
+			&kbdev->gpu_props.props.raw_props.js_features[
+				katom->slot_nr]);
 
 	if (completion_code == BASE_JD_EVENT_STOPPED) {
 		struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
@@ -1098,13 +1262,34 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 		for (idx = 0; idx < 2; idx++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
 									js, 0);
+			bool keep_in_jm_rb = false;
 
-			if (katom) {
-				kbase_gpu_release_atom(kbdev, katom, NULL);
-				kbase_gpu_dequeue_atom(kbdev, js, NULL);
-				katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-				kbase_jm_complete(kbdev, katom, end_timestamp);
+			if (!katom)
+				continue;
+
+			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+				keep_in_jm_rb = true;
+
+			kbase_gpu_release_atom(kbdev, katom, NULL);
+
+			/*
+			 * If the atom wasn't on HW when the reset was issued
+			 * then leave it in the RB and next time we're kicked
+			 * it will be processed again from the starting state.
+			 */
+			if (keep_in_jm_rb) {
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+				katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+				continue;
 			}
+
+			/*
+			 * The atom was on the HW when the reset was issued
+			 * all we can do is fail the atom.
+			 */
+			kbase_gpu_dequeue_atom(kbdev, js, NULL);
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			kbase_jm_complete(kbdev, katom, end_timestamp);
 		}
 	}
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c
index 6a49669af630..d665420ab380 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,6 +23,7 @@
 
 #include <mali_kbase.h>
 #include "mali_kbase_js_affinity.h"
+#include "mali_kbase_hw.h"
 
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
@@ -114,9 +115,14 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
 								BASE_JD_REQ_T) {
 		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-		/* Tiler only job, bit 0 needed to enable tiler but no shader
-		 * cores required */
-		*affinity = 1;
+		 /* If the hardware supports XAFFINITY then we'll only enable
+		  * the tiler (which is the default so this is a no-op),
+		  * otherwise enable shader core 0. */
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = 1;
+		else
+			*affinity = 0;
+
 		return true;
 	}
 
@@ -172,9 +178,12 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 	if (*affinity == 0)
 		return false;
 
-	/* Enable core 0 if tiler required */
-	if (core_req & BASE_JD_REQ_T)
-		*affinity = *affinity | 1;
+	/* Enable core 0 if tiler required for hardware without XAFFINITY
+	 * support (notes above) */
+	if (core_req & BASE_JD_REQ_T) {
+		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+			*affinity = *affinity | 1;
+	}
 
 	return true;
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h
index 3026e6a58303..fbffa3b40962 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_affinity.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,14 +24,6 @@
 #ifndef _KBASE_JS_AFFINITY_H_
 #define _KBASE_JS_AFFINITY_H_
 
-#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
-/* Import the external affinity mask variables */
-extern u64 mali_js0_affinity_mask;
-extern u64 mali_js1_affinity_mask;
-extern u64 mali_js2_affinity_mask;
-#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
-
-
 /**
  * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
  * submit a job to a particular job slot in the current status
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c
index 1e9a7e4c466d..a23deb4ca20c 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_js_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -138,6 +138,17 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 						js_devdata->gpu_reset_ticks_ss;
 				}
 
+				/* If timeouts have been changed then ensure
+				 * that atom tick count is not greater than the
+				 * new soft_stop timeout. This ensures that
+				 * atoms do not miss any of the timeouts due to
+				 * races between this worker and the thread
+				 * changing the timeouts. */
+				if (backend->timeouts_updated &&
+						ticks > soft_stop_ticks)
+					ticks = atom->sched_info.cfs.ticks =
+							soft_stop_ticks;
+
 				/* Job is Soft-Stoppable */
 				if (ticks == soft_stop_ticks) {
 					int disjoint_threshold =
@@ -257,6 +268,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 			HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
 			HRTIMER_MODE_REL);
 
+	backend->timeouts_updated = false;
+
 	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	return HRTIMER_NORESTART;
@@ -335,3 +348,10 @@ void kbase_backend_timer_resume(struct kbase_device *kbdev)
 	kbase_backend_ctx_count_changed(kbdev);
 }
 
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
+{
+	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+	backend->timeouts_updated = true;
+}
+
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c
index 4fd13e2de63e..4a3572d971a6 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,11 +20,9 @@
 #include <mali_kbase.h>
 #include <mali_kbase_mem.h>
 #include <mali_kbase_mmu_hw.h>
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
-#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_as_fault_debugfs.h>
 
 static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
 		u32 num_pages)
@@ -154,6 +152,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 							AS_FAULTADDRESS_LO),
 						kctx);
 
+		/* report the fault to debugfs */
+		kbase_as_fault_debugfs_new(kbdev, as_no);
+
 		/* record the fault status */
 		as->fault_status = kbase_reg_read(kbdev,
 						  MMU_AS_REG(as_no,
@@ -165,6 +166,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 				KBASE_MMU_FAULT_TYPE_BUS :
 				KBASE_MMU_FAULT_TYPE_PAGE;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		as->fault_extra_addr = kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+				kctx);
+		as->fault_extra_addr <<= 32;
+		as->fault_extra_addr |= kbase_reg_read(kbdev,
+				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+				kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 		if (kbase_as_has_bus_fault(as)) {
 			/* Mark bus fault as handled.
@@ -203,10 +213,36 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
 		struct kbase_context *kctx)
 {
 	struct kbase_mmu_setup *current_setup = &as->current_setup;
-#ifdef CONFIG_MALI_MIPE_ENABLED
 	u32 transcfg = 0;
-#endif
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+	/* Clear PTW_MEMATTR bits */
+	transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+	/* Enable correct PTW_MEMATTR bits */
+	transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+		/* Clear PTW_SH bits */
+		transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+		/* Enable correct PTW_SH bits */
+		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+	}
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+			transcfg, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+			(current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
 			current_setup->transtab & 0xFFFFFFFFUL, kctx);
@@ -218,12 +254,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
 			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_tl_attrib_as_config(as,
 			current_setup->transtab,
 			current_setup->memattr,
 			transcfg);
-#endif
 
 	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c
index 947a7ed285d6..711e44c7f80a 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -168,6 +168,7 @@ bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
 
 	/* Force all cores off */
 	kbdev->pm.backend.desired_shader_state = 0;
+	kbdev->pm.backend.desired_tiler_state = 0;
 
 	/* Force all cores to be unavailable, in the situation where
 	 * transitions are in progress for some cores but not others,
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c
index 487391168e25..f891fa225a89 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,7 +35,8 @@ static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
 static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
 {
 	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
-			kbdev->shader_inuse_bitmap))
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
 		return false;
 
 	return true;
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h
index 60e40915869c..e8f96fe6c514 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -192,12 +192,14 @@ union kbase_pm_ca_policy_data {
  * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
  *                        powered off
  * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
- *                        are powered off
+ *                        and/or timers are powered off
  * @gpu_poweroff_timer: Timer for powering off GPU
  * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
  * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
  * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
  *                           timer callback
+ * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
+ *                          callback
  * @poweroff_timer_needed: true if the poweroff timer is currently required,
  *                         false otherwise
  * @poweroff_timer_running: true if the poweroff timer is currently running,
@@ -219,9 +221,6 @@ union kbase_pm_ca_policy_data {
  *                              &struct kbase_pm_callback_conf
  * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
  *                              &struct kbase_pm_callback_conf
- * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition.
- *                           If enable is set then snoops should be enabled
- *                           otherwise snoops should be disabled
  *
  * Note:
  * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
@@ -277,6 +276,7 @@ struct kbase_pm_backend_data {
 	struct work_struct gpu_poweroff_work;
 
 	u64 shader_poweroff_pending;
+	u64 tiler_poweroff_pending;
 
 	bool poweroff_timer_needed;
 	bool poweroff_timer_running;
@@ -288,7 +288,6 @@ struct kbase_pm_backend_data {
 	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
 	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
 	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
-
 };
 
 
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c
index 9dac2303bd00..81322fd0dd17 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_demand.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,7 +37,8 @@ static u64 demand_get_core_mask(struct kbase_device *kbdev)
 static bool demand_get_core_active(struct kbase_device *kbdev)
 {
 	if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
-			kbdev->shader_inuse_bitmap))
+			kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
+			&& !kbdev->tiler_inuse_cnt)
 		return false;
 
 	return true;
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c
index 73b6cff641f6..046ebcb7b508 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_driver.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,11 +27,8 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_pm.h>
-#include <mali_kbase_cache_policy.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_smc.h>
 #include <mali_kbase_hwaccess_jm.h>
@@ -99,6 +96,39 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
 	return (u32)core_type + (u32)action;
 }
 
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+	const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+	u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+	u32 raw;
+
+	/*
+	 * Note that we don't take the cache flush mutex here since
+	 * we expect to be the last user of the L2, all other L2 users
+	 * would have dropped their references, to initiate L2 power
+	 * down, L2 power down being the only valid place for this
+	 * to be called from.
+	 */
+
+	kbase_reg_write(kbdev,
+			GPU_CONTROL_REG(GPU_COMMAND),
+			GPU_COMMAND_CLEAN_INV_CACHES,
+			NULL);
+
+	raw = kbase_reg_read(kbdev,
+		GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+		NULL);
+
+	/* Wait for cache flush to complete before continuing, exit on
+	 * gpu resets or loop expiry. */
+	while (((raw & mask) == 0) && --loops) {
+		raw = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+					NULL);
+	}
+}
+#endif
 
 /**
  * kbase_pm_invoke - Invokes an action on a core set
@@ -134,7 +164,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
 			kbase_trace_mali_pm_power_off(core_type, cores);
 	}
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 	if (cores) {
 		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
 
@@ -144,7 +174,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
 			state &= ~cores;
 		kbase_tlstream_aux_pm_state(core_type, state);
 	}
-#endif
+
 	/* Tracing */
 	if (cores) {
 		if (action == ACTION_PWRON)
@@ -177,6 +207,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
 			case KBASE_PM_CORE_L2:
 				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
 									0u, lo);
+				/* disable snoops before L2 is turned off */
+				kbase_pm_cache_snoop_disable(kbdev);
 				break;
 			default:
 				break;
@@ -404,6 +436,12 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
 			/* All are ready, none will be turned off, and none are
 			 * transitioning */
 			kbdev->pm.backend.l2_powered = 1;
+			/*
+			 * Ensure snoops are enabled after L2 is powered up,
+			 * note that kbase keeps track of the snoop state, so
+			 * safe to repeatedly call.
+			 */
+			kbase_pm_cache_snoop_enable(kbdev);
 			if (kbdev->l2_users_count > 0) {
 				/* Notify any registered l2 cache users
 				 * (optimized out when no users waiting) */
@@ -471,10 +509,12 @@ KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
  * @present:       The bit mask of present caches
  * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
  *                 be powered
+ * @tilers_powered: The bit mask of tilers that are desired to be powered
  *
  * Return: A bit mask of the caches that should be turned on
  */
-static u64 get_desired_cache_status(u64 present, u64 cores_powered)
+static u64 get_desired_cache_status(u64 present, u64 cores_powered,
+		u64 tilers_powered)
 {
 	u64 desired = 0;
 
@@ -497,6 +537,10 @@ static u64 get_desired_cache_status(u64 present, u64 cores_powered)
 		present &= ~bit_mask;
 	}
 
+	/* Power up the required L2(s) for the tiler */
+	if (tilers_powered)
+		desired |= 1;
+
 	return desired;
 }
 
@@ -509,6 +553,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 	bool in_desired_state = true;
 	u64 desired_l2_state;
 	u64 cores_powered;
+	u64 tilers_powered;
 	u64 tiler_available_bitmap;
 	u64 shader_available_bitmap;
 	u64 shader_ready_bitmap;
@@ -542,6 +587,10 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 
 	cores_powered |= kbdev->pm.backend.desired_shader_state;
 
+	/* Work out which tilers want to be powered */
+	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
+	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+
 	/* If there are l2 cache users registered, keep all l2s powered even if
 	 * all other cores are off. */
 	if (kbdev->l2_users_count > 0)
@@ -549,17 +598,11 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 
 	desired_l2_state = get_desired_cache_status(
 			kbdev->gpu_props.props.raw_props.l2_present,
-			cores_powered);
+			cores_powered, tilers_powered);
 
 	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
-	if (0 != desired_l2_state) {
+	if (0 != desired_l2_state)
 		desired_l2_state |= 1;
-		/* Also enable tiler if l2 cache is powered */
-		kbdev->pm.backend.desired_tiler_state =
-			kbdev->gpu_props.props.raw_props.tiler_present;
-	} else {
-		kbdev->pm.backend.desired_tiler_state = 0;
-	}
 
 	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
 	in_desired_state &= kbase_pm_transition_core_type(kbdev,
@@ -665,7 +708,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 						kbase_pm_get_ready_cores(kbdev,
 							KBASE_PM_CORE_TILER));
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 		kbase_tlstream_aux_pm_state(
 				KBASE_PM_CORE_L2,
 				kbase_pm_get_ready_cores(
@@ -679,7 +722,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
 				kbase_pm_get_ready_cores(
 					kbdev,
 					KBASE_PM_CORE_TILER));
-#endif
 
 		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
 				kbdev->pm.backend.gpu_in_desired_state,
@@ -976,6 +1018,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
 		return false;
 	}
 
+	kbase_pm_cache_snoop_disable(kbdev);
 
 	/* The GPU power may be turned off from this point */
 	kbdev->pm.backend.gpu_powered = false;
@@ -1058,18 +1101,20 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
 		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
 
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
 	/* Enable alternative hardware counter selection if configured. */
-	if (DEFAULT_ALTERNATIVE_HWC)
+	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
 		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
 
 	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
 		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
 
 	if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
-		if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
+		if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */
 			kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
-		else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
+		else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */
 			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
 	}
 
@@ -1094,6 +1139,12 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 	kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
 				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
 
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored, we
+		 * optimize the use of shared memory and thus we expect
+		 * some disparity in the memory configuration */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
 
 	/* Only for T86x/T88x-based products after r2p0 */
 	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
@@ -1158,51 +1209,42 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 
 }
 
-
-int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
 {
-	unsigned long irq_flags;
-	struct kbasep_reset_timeout_data rtdata;
-
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	lockdep_assert_held(&kbdev->pm.lock);
-
-	/* Ensure the clock is on before attempting to access the hardware */
-	if (!kbdev->pm.backend.gpu_powered) {
-		if (kbdev->pm.backend.callback_power_on)
-			kbdev->pm.backend.callback_power_on(kbdev);
-
-		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
-								irq_flags);
-		kbdev->pm.backend.gpu_powered = true;
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-								irq_flags);
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		!kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_enable_smc != 0)
+			kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+		kbdev->cci_snoop_enabled = true;
 	}
+}
 
-	/* Ensure interrupts are off to begin with, this also clears any
-	 * outstanding interrupts */
-	kbase_pm_disable_interrupts(kbdev);
-	/* Prepare for the soft-reset */
-	kbdev->pm.backend.reset_done = false;
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+	if ((kbdev->system_coherency == COHERENCY_ACE) &&
+		kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+		if (kbdev->snoop_disable_smc != 0) {
+			mali_cci_flush_l2(kbdev);
+			kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+		}
+#endif /* CONFIG_ARM64 */
+		dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+		kbdev->cci_snoop_enabled = false;
+	}
+}
 
-	/* The cores should be made unavailable due to the reset */
-	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
-	if (kbdev->shader_available_bitmap != 0u)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
-						NULL, 0u, (u32)0u);
-	if (kbdev->tiler_available_bitmap != 0u)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
-						NULL, NULL, 0u, (u32)0u);
-	kbdev->shader_available_bitmap = 0u;
-	kbdev->tiler_available_bitmap = 0u;
-	kbdev->l2_available_bitmap = 0u;
-	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+static int kbase_pm_reset_do_normal(struct kbase_device *kbdev)
+{
+	struct kbasep_reset_timeout_data rtdata;
 
-	/* Soft reset the GPU */
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 	kbase_tlstream_jd_gpu_soft_reset(kbdev);
-#endif
+
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 						GPU_COMMAND_SOFT_RESET, NULL);
 
@@ -1228,7 +1270,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 		/* GPU has been reset */
 		hrtimer_cancel(&rtdata.timer);
 		destroy_hrtimer_on_stack(&rtdata.timer);
-		goto out;
+		return 0;
 	}
 
 	/* No interrupt has been received - check if the RAWSTAT register says
@@ -1264,7 +1306,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 		/* GPU has been reset */
 		hrtimer_cancel(&rtdata.timer);
 		destroy_hrtimer_on_stack(&rtdata.timer);
-		goto out;
+		return 0;
 	}
 
 	destroy_hrtimer_on_stack(&rtdata.timer);
@@ -1272,16 +1314,90 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
 								RESET_TIMEOUT);
 
-	/* The GPU still hasn't reset, give up */
 	return -EINVAL;
+}
+
+static int kbase_pm_reset_do_protected(struct kbase_device *kbdev)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	kbase_tlstream_jd_gpu_soft_reset(kbdev);
+
+	return kbdev->protected_ops->protected_mode_reset(kbdev);
+}
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+	unsigned long irq_flags;
+	int err;
+	bool resume_vinstr = false;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.backend.gpu_powered) {
+		if (kbdev->pm.backend.callback_power_on)
+			kbdev->pm.backend.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+		kbdev->pm.backend.gpu_powered = true;
+		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+								irq_flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any
+	 * outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure cache snoops are disabled before reset. */
+	kbase_pm_cache_snoop_disable(kbdev);
+	/* Prepare for the soft-reset */
+	kbdev->pm.backend.reset_done = false;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+						NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
 
-out:
+	/* Soft reset the GPU */
+	if (kbdev->protected_mode_support &&
+			kbdev->protected_ops->protected_mode_reset)
+		err = kbase_pm_reset_do_protected(kbdev);
+	else
+		err = kbase_pm_reset_do_normal(kbdev);
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags);
+	if (kbdev->protected_mode)
+		resume_vinstr = true;
+	kbdev->protected_mode_transition = false;
+	kbdev->protected_mode = false;
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags);
+
+	if (err)
+		goto exit;
 
 	if (flags & PM_HW_ISSUES_DETECT)
 		kbase_pm_hw_issues_detect(kbdev);
 
 	kbase_pm_hw_issues_apply(kbdev);
 
+	kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+	/* Sanity check protected mode was left after reset */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		u32 gpu_status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+		WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
+	}
 
 	/* If cycle counter was in use re-enable it, enable_irqs will only be
 	 * false when called from kbase_pm_powerup */
@@ -1309,7 +1425,12 @@ out:
 	if (flags & PM_ENABLE_IRQS)
 		kbase_pm_enable_interrupts(kbdev);
 
-	return 0;
+exit:
+	/* If GPU is leaving protected mode resume vinstr operation. */
+	if (kbdev->vinstr_ctx && resume_vinstr)
+		kbase_vinstr_resume(kbdev->vinstr_ctx);
+
+	return err;
 }
 
 /**
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h
index 943eda567cb5..aa51b8cdef8f 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -501,5 +501,23 @@ void kbase_pm_power_changed(struct kbase_device *kbdev);
 void kbase_pm_metrics_update(struct kbase_device *kbdev,
 				ktime_t *now);
 
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:	Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
 
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c
index 343436fc353d..4d006028089a 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_pm_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
 
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
-#include <mali_kbase_gator.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_config_defaults.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
@@ -155,16 +154,22 @@ static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
 static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
 {
 	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
+	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
 
 	lockdep_assert_held(&kbdev->pm.power_change_lock);
 
 	kbdev->pm.backend.desired_shader_state &=
 			~kbdev->pm.backend.shader_poweroff_pending;
+	kbdev->pm.backend.desired_tiler_state &=
+			~kbdev->pm.backend.tiler_poweroff_pending;
 
 	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
 
-	if (prev_shader_state != kbdev->pm.backend.desired_shader_state
-			|| kbdev->pm.backend.ca_in_transition) {
+	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
+			prev_tiler_state !=
+				kbdev->pm.backend.desired_tiler_state ||
+			kbdev->pm.backend.ca_in_transition) {
 		bool cores_are_available;
 
 		KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
@@ -202,7 +207,8 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
 		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
 					&kbdev->pm.backend.gpu_poweroff_work);
 
-	if (kbdev->pm.backend.shader_poweroff_pending) {
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
 		kbdev->pm.backend.shader_poweroff_pending_time--;
 
 		KBASE_DEBUG_ASSERT(
@@ -327,6 +333,7 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 	kbdev->pm.backend.gpu_poweroff_pending = 0;
 
 	kbdev->pm.backend.shader_poweroff_pending = 0;
+	kbdev->pm.backend.tiler_poweroff_pending = 0;
 	kbdev->pm.backend.shader_poweroff_pending_time = 0;
 
 	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
@@ -381,8 +388,10 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 		 * when there are contexts active */
 		KBASE_DEBUG_ASSERT(pm->active_count == 0);
 
-		if (backend->shader_poweroff_pending) {
+		if (backend->shader_poweroff_pending ||
+				backend->tiler_poweroff_pending) {
 			backend->shader_poweroff_pending = 0;
+			backend->tiler_poweroff_pending = 0;
 			backend->shader_poweroff_pending_time = 0;
 		}
 
@@ -441,6 +450,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev)
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 {
 	u64 desired_bitmap;
+	u64 desired_tiler_bitmap;
 	bool cores_are_available;
 	bool do_poweroff = false;
 
@@ -453,23 +463,37 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 		kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
 	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
 
-	/* Enable core 0 if tiler required, regardless of core availability */
 	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
-		desired_bitmap |= 1;
+		desired_tiler_bitmap = 1;
+	else
+		desired_tiler_bitmap = 0;
+
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+		/* Unless XAFFINITY is supported, enable core 0 if tiler
+		 * required, regardless of core availability */
+		if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+			desired_bitmap |= 1;
+	}
 
 	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
 		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
 							(u32)desired_bitmap);
 	/* Are any cores being powered on? */
 	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
 	    kbdev->pm.backend.ca_in_transition) {
 		/* Check if we are powering off any cores before updating shader
 		 * state */
-		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
 			/* Start timer to power off cores */
 			kbdev->pm.backend.shader_poweroff_pending |=
 				(kbdev->pm.backend.desired_shader_state &
 							~desired_bitmap);
+			kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
 
 			if (kbdev->pm.poweroff_shader_ticks)
 				kbdev->pm.backend.shader_poweroff_pending_time =
@@ -479,21 +503,28 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 		}
 
 		kbdev->pm.backend.desired_shader_state = desired_bitmap;
+		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
 
 		/* If any cores are being powered on, transition immediately */
 		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
+				kbdev->pm.backend.desired_tiler_state &
+				~desired_tiler_bitmap) {
 		/* Start timer to power off cores */
 		kbdev->pm.backend.shader_poweroff_pending |=
 				(kbdev->pm.backend.desired_shader_state &
 							~desired_bitmap);
+		kbdev->pm.backend.tiler_poweroff_pending |=
+				(kbdev->pm.backend.desired_tiler_state &
+							~desired_tiler_bitmap);
 		if (kbdev->pm.poweroff_shader_ticks)
 			kbdev->pm.backend.shader_poweroff_pending_time =
 					kbdev->pm.poweroff_shader_ticks;
 		else
 			kbasep_pm_do_poweroff_cores(kbdev);
 	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
-				kbdev->pm.backend.poweroff_timer_needed) {
+			desired_tiler_bitmap != 0 &&
+			kbdev->pm.backend.poweroff_timer_needed) {
 		/* If power policy is keeping cores on despite there being no
 		 * active contexts then disable poweroff timer as it isn't
 		 * required.
@@ -504,11 +535,17 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 
 	/* Ensure timer does not power off wanted cores and make sure to power
 	 * off unwanted cores */
-	if (kbdev->pm.backend.shader_poweroff_pending != 0) {
+	if (kbdev->pm.backend.shader_poweroff_pending ||
+			kbdev->pm.backend.tiler_poweroff_pending) {
 		kbdev->pm.backend.shader_poweroff_pending &=
 				~(kbdev->pm.backend.desired_shader_state &
 								desired_bitmap);
-		if (kbdev->pm.backend.shader_poweroff_pending == 0)
+		kbdev->pm.backend.tiler_poweroff_pending &=
+				~(kbdev->pm.backend.desired_tiler_state &
+				desired_tiler_bitmap);
+
+		if (!kbdev->pm.backend.shader_poweroff_pending &&
+				!kbdev->pm.backend.tiler_poweroff_pending)
 			kbdev->pm.backend.shader_poweroff_pending_time = 0;
 	}
 
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c
index 9d3eb10bd3c9..d965033905ca 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,12 @@ static struct thermal_zone_device *gpu_tz;
 
 static unsigned long model_static_power(unsigned long voltage)
 {
-	int temperature, temp;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+	unsigned long temperature;
+#else
+	int temperature;
+#endif
+	unsigned long temp;
 	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
 	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
 
@@ -85,7 +90,11 @@ static unsigned long model_dynamic_power(unsigned long freq,
 	return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
 }
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+struct devfreq_cooling_ops power_model_simple_ops = {
+#else
 struct devfreq_cooling_power power_model_simple_ops = {
+#endif
 	.get_static_power = model_static_power,
 	.get_dynamic_power = model_dynamic_power,
 };
@@ -150,7 +159,7 @@ int kbase_power_model_simple_init(struct kbase_device *kbdev)
 	dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
 			* 1000) / frequency;
 
-	if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) {
+	if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) {
 		dev_err(kbdev->dev, "ts in power_model not available\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h
index 17eede4d917c..9b5e69a9323b 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_power_model_simple.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,6 +38,10 @@
  */
 int kbase_power_model_simple_init(struct kbase_device *kbdev);
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+extern struct devfreq_cooling_ops power_model_simple_ops;
+#else
 extern struct devfreq_cooling_power power_model_simple_ops;
+#endif
 
 #endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c
index 4bcde85f3ee1..d992989123e8 100644
--- a/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/midgard_for_linux/backend/gpu/mali_kbase_time.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -74,9 +74,10 @@ void kbase_wait_write_flush(struct kbase_context *kctx)
 {
 	u32 base_count = 0;
 
-	/* A suspend won't happen here, because we're in a syscall from a
-	 * userspace thread */
-
+	/*
+	 * The caller must be holding onto the kctx or the call is from
+	 * userspace.
+	 */
 	kbase_pm_context_active(kctx->kbdev);
 	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h
index 2102f43348cb..f7c0ff674906 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_features.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,7 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
 	BASE_HW_FEATURE_MRT,
 	BASE_HW_FEATURE_BRNDOUT_CC,
@@ -46,6 +47,9 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_BRNDOUT_KILL,
 	BASE_HW_FEATURE_WARPING,
 	BASE_HW_FEATURE_V4,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_END
 };
 
@@ -84,6 +88,7 @@ static const enum base_hw_feature base_hw_features_t72x[] = {
 static const enum base_hw_feature base_hw_features_t76x[] = {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
 	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
 	BASE_HW_FEATURE_BRNDOUT_CC,
@@ -101,6 +106,7 @@ static const enum base_hw_feature base_hw_features_t76x[] = {
 static const enum base_hw_feature base_hw_features_tFxx[] = {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
 	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
 	BASE_HW_FEATURE_BRNDOUT_CC,
@@ -121,6 +127,7 @@ static const enum base_hw_feature base_hw_features_t83x[] = {
 	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_WARPING,
 	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
@@ -142,6 +149,7 @@ static const enum base_hw_feature base_hw_features_t82x[] = {
 	BASE_HW_FEATURE_33BIT_VA,
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
 	BASE_HW_FEATURE_WARPING,
 	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
 	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
@@ -159,5 +167,31 @@ static const enum base_hw_feature base_hw_features_t82x[] = {
 	BASE_HW_FEATURE_END
 };
 
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_END
+};
+
+
 
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h
index 66c2dc76fdb3..149f44cb8674 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_hwconfig_issues.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,6 +71,7 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_10487,
 	BASE_HW_ISSUE_10607,
 	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10676,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_10684,
@@ -90,6 +91,7 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -100,7 +102,17 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -153,6 +165,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
 	BASE_HW_ISSUE_10487,
 	BASE_HW_ISSUE_10607,
 	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10676,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_10684,
@@ -166,7 +179,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
 	BASE_HW_ISSUE_11020,
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -192,6 +207,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
 	BASE_HW_ISSUE_10487,
 	BASE_HW_ISSUE_10607,
 	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10676,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_10684,
@@ -203,7 +219,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
 	BASE_HW_ISSUE_11020,
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -226,6 +244,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
 	BASE_HW_ISSUE_10487,
 	BASE_HW_ISSUE_10607,
 	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10676,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_10684,
@@ -236,8 +255,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
 	BASE_HW_ISSUE_11020,
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -252,6 +273,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
 	BASE_HW_ISSUE_10487,
 	BASE_HW_ISSUE_10607,
 	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10676,
 	BASE_HW_ISSUE_10682,
 	BASE_HW_ISSUE_10684,
@@ -267,6 +289,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_END
@@ -277,6 +300,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
 	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10684,
 	BASE_HW_ISSUE_10821,
 	BASE_HW_ISSUE_10883,
@@ -288,8 +312,10 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
 	BASE_HW_ISSUE_11024,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -298,6 +324,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
 	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10684,
 	BASE_HW_ISSUE_10821,
 	BASE_HW_ISSUE_10883,
@@ -307,6 +334,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
 	BASE_HW_ISSUE_11012,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_END
@@ -321,6 +349,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
 	BASE_HW_ISSUE_11024,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -331,7 +360,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -344,6 +377,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
 	BASE_HW_ISSUE_11024,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -354,7 +388,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -365,6 +403,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -375,7 +414,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -388,6 +431,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
 	BASE_HW_ISSUE_11024,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -398,7 +442,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -409,6 +457,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_26,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
@@ -419,7 +468,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -430,6 +483,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
@@ -437,7 +491,11 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -445,6 +503,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10684,
 	BASE_HW_ISSUE_10797,
 	BASE_HW_ISSUE_10821,
@@ -452,8 +511,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -461,6 +522,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10684,
 	BASE_HW_ISSUE_10797,
 	BASE_HW_ISSUE_10821,
@@ -468,8 +530,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -477,6 +541,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10684,
 	BASE_HW_ISSUE_10797,
 	BASE_HW_ISSUE_10821,
@@ -484,8 +549,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_END
 };
 
@@ -494,11 +561,13 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = {
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10797,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -515,6 +584,10 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -525,6 +598,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
 	BASE_HW_ISSUE_8778,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10931,
 	BASE_HW_ISSUE_11012,
 	BASE_HW_ISSUE_11020,
@@ -532,6 +606,7 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -541,6 +616,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = {
 	BASE_HW_ISSUE_6402,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
 	BASE_HW_ISSUE_10931,
 	BASE_HW_ISSUE_11012,
 	BASE_HW_ISSUE_11020,
@@ -549,6 +625,7 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = {
 	BASE_HW_ISSUE_11051,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3964,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -559,6 +636,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
@@ -566,7 +644,11 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -576,13 +658,18 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -592,12 +679,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -607,12 +698,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -624,6 +719,10 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -634,13 +733,18 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3964,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -650,12 +754,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -665,12 +773,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3966,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -682,6 +794,9 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = {
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_T76X_3982,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -692,6 +807,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
@@ -699,6 +815,9 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -708,12 +827,16 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -721,11 +844,14 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
@@ -736,6 +862,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
@@ -743,6 +870,10 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3964,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -752,6 +883,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
@@ -759,6 +891,9 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -768,12 +903,16 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
 	BASE_HW_ISSUE_10883,
 	BASE_HW_ISSUE_10946,
 	BASE_HW_ISSUE_11051,
+	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
 	BASE_HW_ISSUE_T76X_3953,
 	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_END
 };
 
@@ -781,15 +920,68 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
 	BASE_HW_ISSUE_T76X_1963,
 	BASE_HW_ISSUE_T76X_3086,
 	BASE_HW_ISSUE_T76X_3700,
 	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3979,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	GPUCORE_1619,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_11054,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3982,
+	BASE_HW_ISSUE_TMIX_7891,
+	BASE_HW_ISSUE_TMIX_7940,
+	BASE_HW_ISSUE_TMIX_8042,
+	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TMIX_8138,
+	BASE_HW_ISSUE_TMIX_8206,
+	BASE_HW_ISSUE_TMIX_8343,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_END
 };
 
 
 
+
+
 #endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h
index 212100dbff63..749dd9a1cc9e 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_base_kernel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -42,7 +42,14 @@
 /* Support UK9 IOCTLS */
 #define BASE_LEGACY_UK9_SUPPORT 1
 
-typedef u64 base_mem_handle;
+/* Support UK10_2 IOCTLS */
+#define BASE_LEGACY_UK10_2_SUPPORT 1
+
+typedef struct base_mem_handle {
+	struct {
+		u64 handle;
+	} basep;
+} base_mem_handle;
 
 #include "mali_base_mem_priv.h"
 #include "mali_kbase_profiling_gator_api.h"
@@ -62,6 +69,10 @@ typedef u64 base_mem_handle;
 #define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
 #define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
 
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
 
 #define BASE_MAX_COHERENT_GROUPS 16
@@ -162,8 +173,13 @@ enum {
 /* IN */
 	BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
 					     Outer shareable, required. */
-	BASE_MEM_SECURE = (1U << 16)           /**< Secure memory */
-
+	BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+	BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+						    memory */
+	BASE_MEM_IMPORT_SHARED = (1U << 18),   /**< Must use shared CPU/GPU zone
+						    (SAME_VA zone) but doesn't
+						    require the addresses to
+						    be the same */
 };
 
 /**
@@ -171,7 +187,7 @@ enum {
  *
  * Must be kept in sync with the ::base_mem_alloc_flags flags
  */
-#define BASE_MEM_FLAGS_NR_BITS 17
+#define BASE_MEM_FLAGS_NR_BITS 19
 
 /**
   * A mask for all output bits, excluding IN/OUT bits.
@@ -184,6 +200,13 @@ enum {
 #define BASE_MEM_FLAGS_INPUT_MASK \
 	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
 
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+	 BASE_MEM_COHERENT_LOCAL)
 
 /**
  * enum base_mem_import_type - Memory types supported by @a base_mem_import
@@ -224,14 +247,32 @@ struct base_mem_import_user_buffer {
 };
 
 /**
- * @brief Invalid memory handle type.
- * Return value from functions returning @a base_mem_handle on error.
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
  */
-#define BASE_MEM_INVALID_HANDLE                (0ull  << 12)
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
 #define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
 #define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
 #define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
-#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE      (4ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
 /* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
 #define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
 #define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
@@ -240,6 +281,7 @@ struct base_mem_import_user_buffer {
 /* Mask to detect 4GB boundary alignment */
 #define BASE_MEM_MASK_4GB  0xfffff000UL
 
+
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
 
@@ -251,7 +293,6 @@ typedef enum base_backing_threshold_status {
 	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
 	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
 	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
-	BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3,	    /**< Resize attempted on buffer while it was mapped, which is not permitted */
 	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
 } base_backing_threshold_status;
 
@@ -355,6 +396,28 @@ struct base_mem_aliasing_info {
 	u64 length;
 };
 
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+	u64 gpu_alloc_addr;
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	u8 id;
+};
+
 /**
  * @brief Job dependency type.
  *
@@ -381,11 +444,20 @@ typedef u8 base_jd_dep_type;
  * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
  * dependencies, and that doesn't execute anything on the hardware.
  */
-typedef u16 base_jd_core_req;
+typedef u32 base_jd_core_req;
 
 /* Requirements that come from the HW */
-#define BASE_JD_REQ_DEP 0	    /**< No requirement, dependency only */
-#define BASE_JD_REQ_FS  (1U << 0)   /**< Requires fragment shaders */
+
+/**
+ * No requirement, dependency only
+ */
+#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
+
+/**
+ * Requires fragment shaders
+ */
+#define BASE_JD_REQ_FS  ((base_jd_core_req)1 << 0)
+
 /**
  * Requires compute shaders
  * This covers any of the following Midgard Job types:
@@ -397,28 +469,38 @@ typedef u16 base_jd_core_req;
  * job is specifically just the "Compute Shader" job type, and not the "Vertex
  * Shader" nor the "Geometry Shader" job type.
  */
-#define BASE_JD_REQ_CS  (1U << 1)
-#define BASE_JD_REQ_T   (1U << 2)   /**< Requires tiling */
-#define BASE_JD_REQ_CF  (1U << 3)   /**< Requires cache flushes */
-#define BASE_JD_REQ_V   (1U << 4)   /**< Requires value writeback */
+#define BASE_JD_REQ_CS  ((base_jd_core_req)1 << 1)
+#define BASE_JD_REQ_T   ((base_jd_core_req)1 << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  ((base_jd_core_req)1 << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   ((base_jd_core_req)1 << 4)   /**< Requires value writeback */
 
 /* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
 
 /* Requires fragment job with AFBC encoding */
-#define BASE_JD_REQ_FS_AFBC  (1U << 13)
+#define BASE_JD_REQ_FS_AFBC  ((base_jd_core_req)1 << 13)
+
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
 
 /**
  * SW Only requirement: the job chain requires a coherent core group. We don't
  * mind which coherent core group is used.
  */
-#define BASE_JD_REQ_COHERENT_GROUP  (1U << 6)
+#define BASE_JD_REQ_COHERENT_GROUP  ((base_jd_core_req)1 << 6)
 
 /**
  * SW Only requirement: The performance counters should be enabled only when
  * they are needed, to reduce power consumption.
  */
 
-#define BASE_JD_REQ_PERMON               (1U << 7)
+#define BASE_JD_REQ_PERMON               ((base_jd_core_req)1 << 7)
 
 /**
  * SW Only requirement: External resources are referenced by this atom.
@@ -426,14 +508,16 @@ typedef u16 base_jd_core_req;
  * but should instead be part of a NULL jobs inserted into the dependency tree.
  * The first pre_dep object must be configured for the external resouces to use,
  * the second pre_dep object can be used to create other dependencies.
+ *
+ * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE.
  */
-#define BASE_JD_REQ_EXTERNAL_RESOURCES   (1U << 8)
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   ((base_jd_core_req)1 << 8)
 
 /**
  * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
  * to the hardware but will cause some action to happen within the driver
  */
-#define BASE_JD_REQ_SOFT_JOB        (1U << 9)
+#define BASE_JD_REQ_SOFT_JOB        ((base_jd_core_req)1 << 9)
 
 #define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
 #define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
@@ -476,6 +560,66 @@ typedef u16 base_jd_core_req;
  * - Priority is inherited from the replay job.
  */
 #define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
 
 /**
  * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
@@ -485,54 +629,79 @@ typedef u16 base_jd_core_req;
  * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
  * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
  */
-#define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
+#define BASE_JD_REQ_ONLY_COMPUTE    ((base_jd_core_req)1 << 10)
 
 /**
  * HW Requirement: Use the base_jd_atom::device_nr field to specify a
  * particular core group
  *
- * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
  *
- * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms.
  *
  * If the core availability policy is keeping the required core group turned off, then
- * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code.
  */
-#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
 
 /**
  * SW Flag: If this bit is set then the successful completion of this atom
  * will not cause an event to be sent to userspace
  */
-#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   (1U << 12)
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   ((base_jd_core_req)1 << 12)
 
 /**
  * SW Flag: If this bit is set then completion of this atom will not cause an
  * event to be sent to userspace, whether successful or not.
  */
-#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
 
 /**
-* These requirement bits are currently unused in base_jd_core_req (currently a u16)
-*/
-
-#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5)
-#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15)
+ * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job starts which does not have this bit set or a job completes
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if
+ * the CPU may have written to memory addressed by the job since the last job
+ * without this bit set was submitted.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
 
 /**
-* Mask of all the currently unused requirement bits in base_jd_core_req.
-*/
+ * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
+ *
+ * If this bit is set then the GPU's cache will not be cleaned and invalidated
+ * until a GPU job completes which does not have this bit set or a job starts
+ * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if
+ * the CPU may read from or partially overwrite memory addressed by the job
+ * before the next job without this bit set completes.
+ */
+#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
 
-#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \
-				BASEP_JD_REQ_RESERVED_BIT15)
+/**
+ * These requirement bits are currently unused in base_jd_core_req
+ */
+#define BASEP_JD_REQ_RESERVED \
+	(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
+	BASE_JD_REQ_EVENT_COALESCE | \
+	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
+	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
 
 /**
  * Mask of all bits in base_jd_core_req that control the type of the atom.
  *
  * This allows dependency only atoms to have flags set
  */
-#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
-				BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER))
+#define BASE_JD_REQ_ATOM_TYPE \
+	(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
+	BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of a soft job.
+ */
+#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
 
 /**
  * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
@@ -636,18 +805,26 @@ struct base_dependency {
 	base_jd_dep_type dependency_type;    /**< Dependency type */
 };
 
+/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value.
+ * In order to keep the size of the structure same, padding field has been adjusted
+ * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines)
+ * is added at the end of the structure. Place in the structure previously occupied by u16 core_req
+ * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission
+ * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left
+ * for possible future use. */
 typedef struct base_jd_atom_v2 {
 	u64 jc;			    /**< job-chain GPU address */
 	struct base_jd_udata udata;		    /**< user data */
 	kbase_pointer extres_list;	    /**< list of external resources */
 	u16 nr_extres;			    /**< nr of external resources */
-	base_jd_core_req core_req;	    /**< core requirements */
+	u16 compat_core_req;	            /**< core requirements which correspond to the legacy support for UK 10.2 */
 	struct base_dependency pre_dep[2];  /**< pre-dependencies, one need to use SETTER function to assign this field,
 	this is done in order to reduce possibility of improper assigment of a dependency field */
 	base_atom_id atom_number;	    /**< unique number to identify the atom */
 	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
 	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
-	u8 padding[5];
+	u8 padding[1];
+	base_jd_core_req core_req;          /**< core requirements */
 } base_jd_atom_v2;
 
 #ifdef BASE_LEGACY_UK6_SUPPORT
@@ -656,14 +833,14 @@ struct base_jd_atom_v2_uk6 {
 	struct base_jd_udata udata;		    /**< user data */
 	kbase_pointer extres_list;	    /**< list of external resources */
 	u16 nr_extres;			    /**< nr of external resources */
-	base_jd_core_req core_req;	    /**< core requirements */
+	u16 core_req;                       /**< core requirements */
 	base_atom_id pre_dep[2]; /**< pre-dependencies */
 	base_atom_id atom_number;	    /**< unique number to identify the atom */
 	base_jd_prio prio;		    /**< priority - smaller is higher priority */
 	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
 	u8 padding[7];
 };
-#endif
+#endif /* BASE_LEGACY_UK6_SUPPORT */
 
 typedef enum base_external_resource_access {
 	BASE_EXT_RES_ACCESS_SHARED,
@@ -674,6 +851,31 @@ typedef struct base_external_resource {
 	u64 ext_resource;
 } base_external_resource;
 
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+	u64 count;
+	struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+	u64 address;
+	u64 size;
+	struct base_external_resource extres;
+};
+
 /**
  * @brief Setter for a dependency structure
  *
@@ -1450,7 +1652,7 @@ typedef struct mali_base_gpu_props {
  * Flags to pass to ::base_context_init.
  * Flags can be ORed together to enable multiple things.
  *
- * These share the same space as @ref basep_context_private_flags, and so must
+ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must
  * not collide with them.
  */
 enum base_context_create_flags {
@@ -1479,7 +1681,7 @@ enum base_context_create_flags {
 #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
 	((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
 
-/**
+/*
  * Private flags used on the base context
  *
  * These start at bit 31, and run down to zero.
@@ -1487,10 +1689,8 @@ enum base_context_create_flags {
  * They share the same space as @ref base_context_create_flags, and so must
  * not collide with them.
  */
-enum basep_context_private_flags {
-	/** Private flag tracking whether job descriptor dumping is disabled */
-	BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
-};
+/** Private flag tracking whether job descriptor dumping is disabled */
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
 
 /** @} end group base_user_api_core */
 
@@ -1563,9 +1763,21 @@ typedef struct base_jd_replay_payload {
 	 * Core requirements for the fragment job chain
 	 */
 	base_jd_core_req fragment_core_req;
+} base_jd_replay_payload;
 
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+typedef struct base_jd_replay_payload_uk10_2 {
+	u64 tiler_jc_list;
+	u64 fragment_jc;
+	u64 tiler_heap_free;
+	u16 fragment_hierarchy_mask;
+	u16 tiler_hierarchy_mask;
+	u32 hierarchy_default_weight;
+	u16 tiler_core_req;
+	u16 fragment_core_req;
 	u8 padding[4];
-} base_jd_replay_payload;
+} base_jd_replay_payload_uk10_2;
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
 
 /**
  * @brief An entry in the linked list of job chains to be replayed. This must
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase.h
index fe58341fdef8..b6d28fea9987 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,7 @@
 #include <mali_kbase_uku.h>
 #include <mali_kbase_linux.h>
 
+#include "mali_kbase_strings.h"
 #include "mali_kbase_pm.h"
 #include "mali_kbase_mem_lowlevel.h"
 #include "mali_kbase_defs.h"
@@ -139,7 +140,6 @@ void kbase_jd_done_worker(struct work_struct *data);
 void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
 		kbasep_js_atom_done_code done_code);
 void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
-void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
 void kbase_jd_zap_context(struct kbase_context *kctx);
 bool jd_done_nolock(struct kbase_jd_atom *katom,
 		struct list_head *completed_jobs_ctx);
@@ -147,6 +147,7 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
 bool jd_submit_atom(struct kbase_context *kctx,
 			 const struct base_jd_atom_v2 *user_atom,
 			 struct kbase_jd_atom *katom);
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
 
 void kbase_job_done(struct kbase_device *kbdev, u32 done);
 
@@ -174,7 +175,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 		struct kbase_jd_atom *target_katom);
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
-		u16 core_reqs, struct kbase_jd_atom *target_katom);
+		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 		struct kbase_jd_atom *target_katom);
 
@@ -191,9 +192,17 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
 void kbase_finish_soft_job(struct kbase_jd_atom *katom);
 void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
 void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom);
+int kbase_soft_event_update(struct kbase_context *kctx,
+			    u64 event,
+			    unsigned char new_status);
 
 bool kbase_replay_process(struct kbase_jd_atom *katom);
 
+void kbasep_soft_job_timeout_worker(unsigned long data);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+
 /* api used internally for register access. Contains validation and tracing */
 void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
 int kbase_device_trace_buffer_install(
@@ -204,7 +213,6 @@ void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
 void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
 u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
 
-
 void kbasep_as_do_poke(struct work_struct *work);
 
 /** Returns the name associated with a Mali exception code
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c
index 2fb5e3edf49f..c67b3e97f1af 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_cache_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,6 +43,11 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
 void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
 	dma_sync_single_for_device(kbdev->dev, handle, size, dir);
 }
 
@@ -50,5 +55,10 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir)
 {
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+	if (kbdev->system_coherency == COHERENCY_ACE)
+		return;
+#endif /* CONFIG_MALI_COH_KERN */
 	dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h
index 816e45c4d02d..356d52bcd774 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,13 +45,6 @@
  * @{
  */
 
-#if !MALI_CUSTOMER_RELEASE
-/* This flag is set for internal builds so we can run tests without credentials. */
-#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1
-#else
-#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0
-#endif
-
 #include <linux/rbtree.h>
 
 /* Forward declaration of struct kbase_device */
@@ -105,7 +98,7 @@ struct kbase_pm_callback_conf {
 	 * The system integrator can decide whether to either do nothing, just switch off
 	 * the clocks to the GPU, or to completely power down the GPU.
 	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
 	 */
 	void (*power_off_callback)(struct kbase_device *kbdev);
 
@@ -115,7 +108,7 @@ struct kbase_pm_callback_conf {
 	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
 	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
 	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
 	 *
 	 * The return value of the first call to this function is ignored.
 	 *
@@ -160,7 +153,7 @@ struct kbase_pm_callback_conf {
 	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
 	 *
-	 * @return 0 on success, else int erro code.
+	 * @return 0 on success, else int error code.
 	 */
 	 int (*power_runtime_init_callback)(struct kbase_device *kbdev);
 
@@ -203,8 +196,8 @@ struct kbase_pm_callback_conf {
 	 * Returning 0 will cause the runtime PM core to conduct a regular
 	 * autosuspend.
 	 *
-	 * This callback is optional and if not provided regular ausosuspend
-	 * will triggered.
+	 * This callback is optional and if not provided regular autosuspend
+	 * will be triggered.
 	 *
 	 * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
 	 * this feature.
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h
index bd48ed96e962..9b00cce9b2b3 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_config_defaults.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -148,11 +148,6 @@ enum {
  */
 #define DEFAULT_AWID_LIMIT KBASE_AID_32
 
-/**
- * Default setting for using alternative hardware counters.
- */
-#define DEFAULT_ALTERNATIVE_HWC false
-
 /**
  * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
  * defines which UMP device this GPU should be mapped to.
@@ -220,6 +215,12 @@ enum {
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
 
+/*
+ * Default timeout for some software jobs, after which the software event wait
+ * jobs will be cancelled.
+ */
+#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */
+
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c
index 798979963937..344a1f16de8a 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,7 @@
 
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
-#include <mali_kbase_instr.h>
-
+#include <mali_kbase_mem_linux.h>
 
 /**
  * kbase_create_context() - Create a kernel base context.
@@ -65,6 +64,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	kctx->process_mm = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	kctx->slots_pullable = 0;
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid;
 
 	err = kbase_mem_pool_init(&kctx->mem_pool,
 			kbdev->mem_pool_max_size_default,
@@ -72,11 +73,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	if (err)
 		goto free_kctx;
 
+	err = kbase_mem_evictable_init(kctx);
+	if (err)
+		goto free_pool;
+
 	atomic_set(&kctx->used_pages, 0);
 
 	err = kbase_jd_init(kctx);
 	if (err)
-		goto free_pool;
+		goto deinit_evictable;
 
 	err = kbasep_js_kctx_init(kctx);
 	if (err)
@@ -86,16 +91,22 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	if (err)
 		goto free_jd;
 
+	atomic_set(&kctx->drain_pending, 0);
+
 	mutex_init(&kctx->reg_lock);
 
 	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+	spin_lock_init(&kctx->waiting_soft_jobs_lock);
 #ifdef CONFIG_KDS
 	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
 #endif
+	err = kbase_dma_fence_init(kctx);
+	if (err)
+		goto free_event;
 
 	err = kbase_mmu_init(kctx);
 	if (err)
-		goto free_event;
+		goto term_dma_fence;
 
 	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
 	if (!kctx->pgd)
@@ -105,8 +116,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	if (!kctx->aliasing_sink_page)
 		goto no_sink_page;
 
-	kctx->tgid = current->tgid;
-	kctx->pid = current->pid;
 	init_waitqueue_head(&kctx->event_queue);
 
 	kctx->cookies = KBASE_COOKIE_MASK;
@@ -115,6 +124,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 	err = kbase_region_tracker_init(kctx);
 	if (err)
 		goto no_region_tracker;
+
+	err = kbase_sticky_resource_init(kctx);
+	if (err)
+		goto no_sticky;
+
+	err = kbase_jit_init(kctx);
+	if (err)
+		goto no_jit;
 #ifdef CONFIG_GPU_TRACEPOINTS
 	atomic_set(&kctx->jctx.work_id, 0);
 #endif
@@ -126,8 +143,18 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 
 	mutex_init(&kctx->vinstr_cli_lock);
 
+	setup_timer(&kctx->soft_job_timeout,
+		    kbasep_soft_job_timeout_worker,
+		    (uintptr_t)kctx);
+
 	return kctx;
 
+no_jit:
+	kbase_gpu_vm_lock(kctx);
+	kbase_sticky_resource_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+no_sticky:
+	kbase_region_tracker_term(kctx);
 no_region_tracker:
 	kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
 no_sink_page:
@@ -137,12 +164,16 @@ no_sink_page:
 	kbase_gpu_vm_unlock(kctx);
 free_mmu:
 	kbase_mmu_term(kctx);
+term_dma_fence:
+	kbase_dma_fence_term(kctx);
 free_event:
 	kbase_event_cleanup(kctx);
 free_jd:
 	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
 	kbasep_js_kctx_term(kctx);
 	kbase_jd_exit(kctx);
+deinit_evictable:
+	kbase_mem_evictable_deinit(kctx);
 free_pool:
 	kbase_mem_pool_term(&kctx->mem_pool);
 free_kctx:
@@ -188,8 +219,18 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	kbase_jd_zap_context(kctx);
 	kbase_event_cleanup(kctx);
 
+	/*
+	 * JIT must be terminated before the code below as it must be called
+	 * without the region lock being held.
+	 * The code above ensures no new JIT allocations can be made by
+	 * by the time we get to this point of context tear down.
+	 */
+	kbase_jit_term(kctx);
+
 	kbase_gpu_vm_lock(kctx);
 
+	kbase_sticky_resource_term(kctx);
+
 	/* MMU is disabled as part of scheduling out the context */
 	kbase_mmu_free_pgd(kctx);
 
@@ -219,12 +260,15 @@ void kbase_destroy_context(struct kbase_context *kctx)
 
 	kbase_pm_context_idle(kbdev);
 
+	kbase_dma_fence_term(kctx);
+
 	kbase_mmu_term(kctx);
 
 	pages = atomic_read(&kctx->used_pages);
 	if (pages != 0)
 		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
 
+	kbase_mem_evictable_deinit(kctx);
 	kbase_mem_pool_term(&kctx->mem_pool);
 	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h
new file mode 100644
index 000000000000..a3f5bb0ce0da
--- /dev/null
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_context.h
@@ -0,0 +1,90 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CONTEXT_H_
+#define _KBASE_CONTEXT_H_
+
+#include <linux/atomic.h>
+
+
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+/**
+ * kbase_ctx_flag - Check if @flag is set on @kctx
+ * @kctx: Pointer to kbase context to check
+ * @flag: Flag to check
+ *
+ * Return: true if @flag is set on @kctx, false if not.
+ */
+static inline bool kbase_ctx_flag(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	return atomic_read(&kctx->flags) & flag;
+}
+
+/**
+ * kbase_ctx_flag_clear - Clear @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Clear the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_clear(struct kbase_context *kctx,
+					enum kbase_context_flags flag)
+{
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+	/*
+	 * Earlier kernel versions doesn't have atomic_andnot() or
+	 * atomic_and(). atomic_clear_mask() was only available on some
+	 * architectures and removed on arm in v3.13 on arm and arm64.
+	 *
+	 * Use a compare-exchange loop to clear the flag on pre 4.3 kernels,
+	 * when atomic_andnot() becomes available.
+	 */
+	int old, new;
+
+	do {
+		old = atomic_read(&kctx->flags);
+		new = old & ~flag;
+
+	} while (atomic_cmpxchg(&kctx->flags, old, new) != old);
+#else
+	atomic_andnot(flag, &kctx->flags);
+#endif
+}
+
+/**
+ * kbase_ctx_flag_set - Set @flag on @kctx
+ * @kctx: Pointer to kbase context
+ * @flag: Flag to clear
+ *
+ * Set the @flag on @kctx. This is done atomically, so other flags being
+ * cleared or set at the same time will be safe.
+ *
+ * Some flags have locking requirements, check the documentation for the
+ * respective flags.
+ */
+static inline void kbase_ctx_flag_set(struct kbase_context *kctx,
+				      enum kbase_context_flags flag)
+{
+	atomic_or(flag, &kctx->flags);
+}
+#endif /* _KBASE_CONTEXT_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c
index ab6f5e45eacc..ee59504cd4e8 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_core_linux.c
@@ -1,7 +1,6 @@
-
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -18,13 +17,10 @@
 #include "platform/rk/custom_log.h"
 
 #include <mali_kbase.h>
-#include <mali_kbase_hwaccess_gpuprops.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_uku.h>
 #include <mali_midg_regmap.h>
-#include <mali_kbase_instr.h>
 #include <mali_kbase_gator.h>
-#include <backend/gpu/mali_kbase_js_affinity.h>
 #include <mali_kbase_mem_linux.h>
 #ifdef CONFIG_MALI_DEVFREQ
 #include <backend/gpu/mali_kbase_devfreq.h>
@@ -63,12 +59,10 @@
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
-#include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/compat.h>	/* is_compat_task */
 #include <linux/mman.h>
 #include <linux/version.h>
-#include <linux/security.h>
 #ifdef CONFIG_MALI_PLATFORM_DEVICETREE
 #include <linux/pm_runtime.h>
 #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
@@ -88,9 +82,6 @@
 
 #include <mali_kbase_config.h>
 
-#ifdef CONFIG_MACH_MANTA
-#include <plat/devs.h>
-#endif
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
 #include <linux/pm_opp.h>
@@ -98,9 +89,9 @@
 #include <linux/opp.h>
 #endif
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
+
+#include <mali_kbase_as_fault_debugfs.h>
 
 /* GPU IRQ Tags */
 #define	JOB_IRQ_TAG	0
@@ -112,12 +103,9 @@ static struct kbase_exported_test_data shared_kernel_test_data;
 EXPORT_SYMBOL(shared_kernel_test_data);
 #endif /* MALI_UNIT_TEST */
 
-#define KBASE_DRV_NAME "mali"
 /** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */
 #define ROCKCHIP_VERSION    (13)
 
-static const char kbase_drv_name[] = KBASE_DRV_NAME;
-
 static int kbase_dev_nr;
 
 static DEFINE_MUTEX(kbase_dev_list_lock);
@@ -160,204 +148,8 @@ static int kds_resource_release(struct inode *inode, struct file *file)
 	}
 	return 0;
 }
-
-static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
-{
-	struct base_external_resource *res = ext_res;
-	int res_id;
-
-	/* assume we have to wait for all */
-
-	KBASE_DEBUG_ASSERT(0 != num_elems);
-	resources_list->kds_resources = kmalloc_array(num_elems,
-			sizeof(struct kds_resource *), GFP_KERNEL);
-
-	if (NULL == resources_list->kds_resources)
-		return -ENOMEM;
-
-	KBASE_DEBUG_ASSERT(0 != num_elems);
-	resources_list->kds_access_bitmap = kzalloc(
-			sizeof(unsigned long) *
-			((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
-			GFP_KERNEL);
-
-	if (NULL == resources_list->kds_access_bitmap) {
-		kfree(resources_list->kds_access_bitmap);
-		return -ENOMEM;
-	}
-
-	kbase_gpu_vm_lock(kctx);
-	for (res_id = 0; res_id < num_elems; res_id++, res++) {
-		int exclusive;
-		struct kbase_va_region *reg;
-		struct kds_resource *kds_res = NULL;
-
-		exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
-		reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
-
-		/* did we find a matching region object? */
-		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
-			break;
-
-		/* no need to check reg->alloc as only regions with an alloc has
-		 * a size, and kbase_region_tracker_find_region_enclosing_address
-		 * only returns regions with size > 0 */
-		switch (reg->gpu_alloc->type) {
-#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
-		case KBASE_MEM_TYPE_IMPORTED_UMP:
-			kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
-			break;
-#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
-		default:
-			break;
-		}
-
-		/* no kds resource for the region ? */
-		if (!kds_res)
-			break;
-
-		resources_list->kds_resources[res_id] = kds_res;
-
-		if (exclusive)
-			set_bit(res_id, resources_list->kds_access_bitmap);
-	}
-	kbase_gpu_vm_unlock(kctx);
-
-	/* did the loop run to completion? */
-	if (res_id == num_elems)
-		return 0;
-
-	/* Clean up as the resource list is not valid. */
-	kfree(resources_list->kds_resources);
-	kfree(resources_list->kds_access_bitmap);
-
-	return -EINVAL;
-}
-
-static bool kbasep_validate_kbase_pointer(
-		struct kbase_context *kctx, union kbase_pointer *p)
-{
-	if (kctx->is_compat) {
-		if (p->compat_value == 0)
-			return false;
-	} else {
-		if (NULL == p->value)
-			return false;
-	}
-	return true;
-}
-
-static int kbase_external_buffer_lock(struct kbase_context *kctx,
-		struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
-{
-	struct base_external_resource *ext_res_copy;
-	size_t ext_resource_size;
-	int ret = -EINVAL;
-	int fd = -EBADF;
-	struct base_external_resource __user *ext_res_user;
-	int __user *file_desc_usr;
-	struct kbasep_kds_resource_set_file_data *fdata;
-	struct kbase_kds_resource_list_data resource_list_data;
-
-	if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
-		return -EINVAL;
-
-	/* Check user space has provided valid data */
-	if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
-			!kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
-			(0 == args->num_res) ||
-			(args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
-		return -EINVAL;
-
-	ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
-
-	KBASE_DEBUG_ASSERT(0 != ext_resource_size);
-	ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
-
-	if (!ext_res_copy)
-		return -EINVAL;
-#ifdef CONFIG_COMPAT
-	if (kctx->is_compat) {
-		ext_res_user = compat_ptr(args->external_resource.compat_value);
-		file_desc_usr = compat_ptr(args->file_descriptor.compat_value);
-	} else {
-#endif /* CONFIG_COMPAT */
-		ext_res_user = args->external_resource.value;
-		file_desc_usr = args->file_descriptor.value;
-#ifdef CONFIG_COMPAT
-	}
-#endif /* CONFIG_COMPAT */
-
-	/* Copy the external resources to lock from user space */
-	if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size))
-		goto out;
-
-	/* Allocate data to be stored in the file */
-	fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
-
-	if (!fdata) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	/* Parse given elements and create resource and access lists */
-	ret = kbasep_kds_allocate_resource_list_data(kctx,
-			ext_res_copy, args->num_res, &resource_list_data);
-	if (!ret) {
-		long err;
-
-		fdata->lock = NULL;
-
-		fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
-
-		err = copy_to_user(file_desc_usr, &fd, sizeof(fd));
-
-		/* If the file descriptor was valid and we successfully copied
-		 * it to user space, then we can try and lock the requested
-		 * kds resources.
-		 */
-		if ((fd >= 0) && (0 == err)) {
-			struct kds_resource_set *lock;
-
-			lock = kds_waitall(args->num_res,
-					resource_list_data.kds_access_bitmap,
-					resource_list_data.kds_resources,
-					KDS_WAIT_BLOCKING);
-
-			if (!lock) {
-				ret = -EINVAL;
-			} else if (IS_ERR(lock)) {
-				ret = PTR_ERR(lock);
-			} else {
-				ret = 0;
-				fdata->lock = lock;
-			}
-		} else {
-			ret = -EINVAL;
-		}
-
-		kfree(resource_list_data.kds_resources);
-		kfree(resource_list_data.kds_access_bitmap);
-	}
-
-	if (ret) {
-		/* If the file was opened successfully then close it which will
-		 * clean up the file data, otherwise we clean up the file data
-		 * ourself.
-		 */
-		if (fd >= 0)
-			sys_close(fd);
-		else
-			kfree(fdata);
-	}
-out:
-	kfree(ext_res_copy);
-
-	return ret;
-}
 #endif /* CONFIG_KDS */
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
 static void kbase_create_timeline_objects(struct kbase_context *kctx)
 {
 	struct kbase_device             *kbdev = kctx->kbdev;
@@ -397,7 +189,8 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx)
 	list_for_each_entry(element, &kbdev->kctx_list, link) {
 		kbase_tlstream_tl_summary_new_ctx(
 				element->kctx,
-				(u32)(element->kctx->id));
+				(u32)(element->kctx->id),
+				(u32)(element->kctx->tgid));
 	}
 	/* Before releasing the lock, reset body stream buffers.
 	 * This will prevent context creation message to be directed to both
@@ -409,7 +202,6 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx)
 	 * user space. */
 	kbase_tlstream_flush_streams();
 }
-#endif
 
 static void kbase_api_handshake(struct uku_version_check_args *version)
 {
@@ -476,6 +268,34 @@ enum mali_error {
 	MALI_ERROR_FUNCTION_FAILED,
 };
 
+enum {
+	inited_mem = (1u << 0),
+	inited_js = (1u << 1),
+	inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+	inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+	inited_tlstream = (1u << 4),
+	inited_backend_early = (1u << 5),
+	inited_backend_late = (1u << 6),
+	inited_device = (1u << 7),
+	inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+	inited_job_fault = (1u << 10),
+	inited_misc_register = (1u << 11),
+	inited_get_device = (1u << 12),
+	inited_sysfs_group = (1u << 13),
+	inited_dev_list = (1u << 14),
+	inited_debugfs = (1u << 15),
+	inited_gpu_device = (1u << 16),
+	inited_registers_map = (1u << 17),
+	inited_power_control = (1u << 19),
+	inited_buslogger = (1u << 20)
+};
+
+
 #ifdef CONFIG_MALI_DEBUG
 #define INACTIVE_WAIT_MS (5000)
 
@@ -561,6 +381,18 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 
 	/* setup complete, perform normal operation */
 	switch (id) {
+	case KBASE_FUNC_MEM_JIT_INIT:
+		{
+			struct kbase_uk_mem_jit_init *jit_init = args;
+
+			if (sizeof(*jit_init) != args_size)
+				goto bad_size;
+
+			if (kbase_region_tracker_init_jit(kctx,
+					jit_init->va_pages))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
 	case KBASE_FUNC_MEM_ALLOC:
 		{
 			struct kbase_uk_mem_alloc *mem = args;
@@ -569,6 +401,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 			if (sizeof(*mem) != args_size)
 				goto bad_size;
 
+#if defined(CONFIG_64BIT)
+			if (!kctx->is_compat) {
+				/* force SAME_VA if a 64-bit client */
+				mem->flags |= BASE_MEM_SAME_VA;
+			}
+#endif
+
 			reg = kbase_mem_alloc(kctx, mem->va_pages,
 					mem->commit_pages, mem->extent,
 					&mem->flags, &mem->gpu_va,
@@ -595,10 +434,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 				break;
 			}
 
-			if (kbase_mem_import(kctx, mem_import->type, phandle,
-						&mem_import->gpu_va,
-						&mem_import->va_pages,
-						&mem_import->flags)) {
+			if (kbase_mem_import(kctx,
+					(enum base_mem_import_type)
+					mem_import->type,
+					phandle,
+					&mem_import->gpu_va,
+					&mem_import->va_pages,
+					&mem_import->flags)) {
 				mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
 				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
 			}
@@ -779,7 +621,7 @@ copy_failed:
 			if (sizeof(*sn) != args_size)
 				goto bad_size;
 
-			if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) {
+			if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
 				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
 				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
 				break;
@@ -954,26 +796,6 @@ copy_failed:
 			break;
 		}
 
-	case KBASE_FUNC_EXT_BUFFER_LOCK:
-		{
-#ifdef CONFIG_KDS
-			ret = kbase_external_buffer_lock(kctx,
-				(struct kbase_uk_ext_buff_kds_data *)args,
-				args_size);
-			switch (ret) {
-			case 0:
-				ukh->ret = MALI_ERROR_NONE;
-				break;
-			case -ENOMEM:
-				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
-				break;
-			default:
-				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
-			}
-#endif /* CONFIG_KDS */
-			break;
-		}
-
 	case KBASE_FUNC_SET_TEST_DATA:
 		{
 #if MALI_UNIT_TEST
@@ -1075,7 +897,7 @@ copy_failed:
 				goto bad_size;
 
 			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
-				dev_err(kbdev->dev, "buffer too big");
+				dev_err(kbdev->dev, "buffer too big\n");
 				goto out_bad;
 			}
 
@@ -1119,7 +941,6 @@ copy_failed:
 		}
 #endif /* CONFIG_MALI_NO_MALI */
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
 	case KBASE_FUNC_TLSTREAM_ACQUIRE:
 		{
 			struct kbase_uk_tlstream_acquire *tlstream_acquire =
@@ -1179,7 +1000,6 @@ copy_failed:
 			break;
 		}
 #endif /* MALI_UNIT_TEST */
-#endif /* CONFIG_MALI_MIPE_ENABLED */
 
 	case KBASE_FUNC_GET_CONTEXT_ID:
 		{
@@ -1189,8 +1009,27 @@ copy_failed:
 			break;
 		}
 
+	case KBASE_FUNC_SOFT_EVENT_UPDATE:
+		{
+			struct kbase_uk_soft_event_update *update = args;
+
+			if (sizeof(*update) != args_size)
+				goto bad_size;
+
+			if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+			    (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+			    (update->flags != 0))
+				goto out_bad;
+
+			if (kbase_soft_event_update(kctx, update->evt,
+						update->new_status))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
 	default:
-		dev_err(kbdev->dev, "unknown ioctl %u", id);
+		dev_err(kbdev->dev, "unknown ioctl %u\n", id);
 		goto out_bad;
 	}
 
@@ -1207,6 +1046,47 @@ static struct kbase_device *to_kbase_device(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
+static int assign_irqs(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int i;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			return -ENOENT;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			return -EINVAL;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+	}
+
+	return 0;
+}
+
 /*
  * API to acquire device list mutex and
  * return pointer to the device list head
@@ -1309,7 +1189,8 @@ static int kbase_open(struct inode *inode, struct file *filp)
 
 	kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
 
-#endif /* CONFIG_DEBUGFS */
+	kbase_jit_debugfs_add(kctx);
+#endif /* CONFIG_DEBUG_FS */
 
 	dev_dbg(kbdev->dev, "created base context\n");
 
@@ -1321,11 +1202,10 @@ static int kbase_open(struct inode *inode, struct file *filp)
 			mutex_lock(&kbdev->kctx_list_lock);
 			element->kctx = kctx;
 			list_add(&element->link, &kbdev->kctx_list);
-#ifdef CONFIG_MALI_MIPE_ENABLED
 			kbase_tlstream_tl_new_ctx(
 					element->kctx,
-					(u32)(element->kctx->id));
-#endif
+					(u32)(element->kctx->id),
+					(u32)(element->kctx->tgid));
 			mutex_unlock(&kbdev->kctx_list_lock);
 		} else {
 			/* we don't treat this as a fail - just warn about it */
@@ -1346,9 +1226,7 @@ static int kbase_release(struct inode *inode, struct file *filp)
 	struct kbasep_kctx_list_element *element, *tmp;
 	bool found_element = false;
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
 	kbase_tlstream_tl_del_ctx(kctx);
-#endif
 
 #ifdef CONFIG_DEBUG_FS
 	debugfs_remove_recursive(kctx->kctx_dentry);
@@ -1613,11 +1491,12 @@ static unsigned long kbase_get_unmapped_area(struct file *filp,
 				flags);
 
 	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
-		info.high_limit = 1ul << 33;
+		info.high_limit = kctx->same_va_end << PAGE_SHIFT;
 		info.align_mask = 0;
 		info.align_offset = 0;
 	} else {
-		info.high_limit = mm->mmap_base;
+		info.high_limit = min_t(unsigned long, mm->mmap_base,
+					(kctx->same_va_end << PAGE_SHIFT));
 		if (len >= SZ_2M) {
 			info.align_offset = SZ_2M;
 			info.align_mask = SZ_2M - 1;
@@ -1661,7 +1540,6 @@ u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
 }
 #endif /* !CONFIG_MALI_NO_MALI */
 
-
 /** Show callback for the @c power_policy sysfs file.
  *
  * This function is called to get the contents of the @c power_policy sysfs
@@ -1984,6 +1862,89 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
  */
 static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
 
+/**
+ * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This allows setting the timeout for software jobs. Waiting soft event wait
+ * jobs will be cancelled after this period expires, while soft fence wait jobs
+ * will print debug information if the fence debug feature is enabled.
+ *
+ * This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_soft_job_timeout(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int soft_job_timeout_ms;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) ||
+	    (soft_job_timeout_ms <= 0))
+		return -EINVAL;
+
+	atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+		   soft_job_timeout_ms);
+
+	return count;
+}
+
+/**
+ * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs
+ * file.
+ *
+ * This will return the timeout for the software jobs.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_soft_job_timeout(struct device *dev,
+				       struct device_attribute *attr,
+				       char * const buf)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n",
+			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
+}
+
+static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
+		   show_soft_job_timeout, set_soft_job_timeout);
+
+static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
+				int default_ticks, u32 old_ticks)
+{
+	if (timeout_ms > 0) {
+		u64 ticks = timeout_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_period_ns);
+		if (!ticks)
+			return 1;
+		return ticks;
+	} else if (timeout_ms < 0) {
+		return default_ticks;
+	} else {
+		return old_ticks;
+	}
+}
+
 /** Store callback for the @c js_timeouts sysfs file.
  *
  * This function is called to get the contents of the @c js_timeouts sysfs
@@ -2028,99 +1989,45 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
 			&js_reset_ms_cl, &js_reset_ms_dumping);
 
 	if (items == 8) {
-		u64 ticks;
-
-		if (js_soft_stop_ms >= 0) {
-			ticks = js_soft_stop_ms * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_soft_stop_ticks = ticks;
-		} else {
-			kbdev->js_soft_stop_ticks = -1;
-		}
-
-		if (js_soft_stop_ms_cl >= 0) {
-			ticks = js_soft_stop_ms_cl * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_soft_stop_ticks_cl = ticks;
-		} else {
-			kbdev->js_soft_stop_ticks_cl = -1;
-		}
-
-		if (js_hard_stop_ms_ss >= 0) {
-			ticks = js_hard_stop_ms_ss * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_hard_stop_ticks_ss = ticks;
-		} else {
-			kbdev->js_hard_stop_ticks_ss = -1;
-		}
-
-		if (js_hard_stop_ms_cl >= 0) {
-			ticks = js_hard_stop_ms_cl * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_hard_stop_ticks_cl = ticks;
-		} else {
-			kbdev->js_hard_stop_ticks_cl = -1;
-		}
-
-		if (js_hard_stop_ms_dumping >= 0) {
-			ticks = js_hard_stop_ms_dumping * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_hard_stop_ticks_dumping = ticks;
-		} else {
-			kbdev->js_hard_stop_ticks_dumping = -1;
-		}
-
-		if (js_reset_ms_ss >= 0) {
-			ticks = js_reset_ms_ss * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_reset_ticks_ss = ticks;
-		} else {
-			kbdev->js_reset_ticks_ss = -1;
-		}
-
-		if (js_reset_ms_cl >= 0) {
-			ticks = js_reset_ms_cl * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_reset_ticks_cl = ticks;
-		} else {
-			kbdev->js_reset_ticks_cl = -1;
-		}
-
-		if (js_reset_ms_dumping >= 0) {
-			ticks = js_reset_ms_dumping * 1000000ULL;
-			do_div(ticks, kbdev->js_data.scheduling_period_ns);
-			kbdev->js_reset_ticks_dumping = ticks;
-		} else {
-			kbdev->js_reset_ticks_dumping = -1;
-		}
-
-		kbdev->js_timeouts_updated = true;
-
-		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_soft_stop_ticks,
-				js_soft_stop_ms);
-		dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_soft_stop_ticks_cl,
-				js_soft_stop_ms_cl);
-		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_hard_stop_ticks_ss,
-				js_hard_stop_ms_ss);
-		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_hard_stop_ticks_cl,
-				js_hard_stop_ms_cl);
-		dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n",
-				(unsigned long)
-					kbdev->js_hard_stop_ticks_dumping,
-				js_hard_stop_ms_dumping);
-		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_reset_ticks_ss,
-				js_reset_ms_ss);
-		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_reset_ticks_cl,
-				js_reset_ms_cl);
-		dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n",
-				(unsigned long)kbdev->js_reset_ticks_dumping,
-				js_reset_ms_dumping);
+		struct kbasep_js_device_data *js_data = &kbdev->js_data;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\
+	js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \
+			default, js_data->ticks_name); \
+	dev_dbg(kbdev->dev, "Overriding " #ticks_name \
+			" with %lu ticks (%lu ms)\n", \
+			(unsigned long)js_data->ticks_name, \
+			ms_name); \
+	} while (0)
+
+		UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms,
+				DEFAULT_JS_SOFT_STOP_TICKS);
+		UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl,
+				DEFAULT_JS_SOFT_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_HARD_STOP_TICKS_SS_8408 :
+				DEFAULT_JS_HARD_STOP_TICKS_SS);
+		UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl,
+				DEFAULT_JS_HARD_STOP_TICKS_CL);
+		UPDATE_TIMEOUT(hard_stop_ticks_dumping,
+				js_hard_stop_ms_dumping,
+				DEFAULT_JS_HARD_STOP_TICKS_DUMPING);
+		UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss,
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ?
+				DEFAULT_JS_RESET_TICKS_SS_8408 :
+				DEFAULT_JS_RESET_TICKS_SS);
+		UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl,
+				DEFAULT_JS_RESET_TICKS_CL);
+		UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping,
+				DEFAULT_JS_RESET_TICKS_DUMPING);
+
+		kbase_js_set_timeouts(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
 
 		return count;
 	}
@@ -2131,6 +2038,16 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr
 	return -EINVAL;
 }
 
+static unsigned long get_js_timeout_in_ms(
+		u32 scheduling_period_ns,
+		u32 ticks)
+{
+	u64 ms = (u64)ticks * scheduling_period_ns;
+
+	do_div(ms, 1000000UL);
+	return ms;
+}
+
 /** Show callback for the @c js_timeouts sysfs file.
  *
  * This function is called to get the contents of the @c js_timeouts sysfs
@@ -2147,7 +2064,6 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att
 {
 	struct kbase_device *kbdev;
 	ssize_t ret;
-	u64 ms;
 	unsigned long js_soft_stop_ms;
 	unsigned long js_soft_stop_ms_cl;
 	unsigned long js_hard_stop_ms_ss;
@@ -2156,90 +2072,34 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att
 	unsigned long js_reset_ms_ss;
 	unsigned long js_reset_ms_cl;
 	unsigned long js_reset_ms_dumping;
-	unsigned long ticks;
 	u32 scheduling_period_ns;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	/* If no contexts have been scheduled since js_timeouts was last written
-	 * to, the new timeouts might not have been latched yet. So check if an
-	 * update is pending and use the new values if necessary. */
-	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
-		scheduling_period_ns = kbdev->js_scheduling_period_ns;
-	else
-		scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+	scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
 
-	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
-		ticks = kbdev->js_soft_stop_ticks;
-	else
-		ticks = kbdev->js_data.soft_stop_ticks;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_soft_stop_ms = (unsigned long)ms;
+#define GET_TIMEOUT(name) get_js_timeout_in_ms(\
+		scheduling_period_ns, \
+		kbdev->js_data.name)
 
-	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
-		ticks = kbdev->js_soft_stop_ticks_cl;
-	else
-		ticks = kbdev->js_data.soft_stop_ticks_cl;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_soft_stop_ms_cl = (unsigned long)ms;
+	js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks);
+	js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl);
+	js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss);
+	js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl);
+	js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping);
+	js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss);
+	js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl);
+	js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping);
 
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
-		ticks = kbdev->js_hard_stop_ticks_ss;
-	else
-		ticks = kbdev->js_data.hard_stop_ticks_ss;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_hard_stop_ms_ss = (unsigned long)ms;
+#undef GET_TIMEOUT
 
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
-		ticks = kbdev->js_hard_stop_ticks_cl;
-	else
-		ticks = kbdev->js_data.hard_stop_ticks_cl;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_hard_stop_ms_cl = (unsigned long)ms;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
-		ticks = kbdev->js_hard_stop_ticks_dumping;
-	else
-		ticks = kbdev->js_data.hard_stop_ticks_dumping;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_hard_stop_ms_dumping = (unsigned long)ms;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
-		ticks = kbdev->js_reset_ticks_ss;
-	else
-		ticks = kbdev->js_data.gpu_reset_ticks_ss;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_reset_ms_ss = (unsigned long)ms;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
-		ticks = kbdev->js_reset_ticks_cl;
-	else
-		ticks = kbdev->js_data.gpu_reset_ticks_cl;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_reset_ms_cl = (unsigned long)ms;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
-		ticks = kbdev->js_reset_ticks_dumping;
-	else
-		ticks = kbdev->js_data.gpu_reset_ticks_dumping;
-	ms = (u64)ticks * scheduling_period_ns;
-	do_div(ms, 1000000UL);
-	js_reset_ms_dumping = (unsigned long)ms;
-
-	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
-			js_soft_stop_ms, js_soft_stop_ms_cl,
-			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
-			js_hard_stop_ms_dumping, js_reset_ms_ss,
-			js_reset_ms_cl, js_reset_ms_dumping);
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_dumping, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_dumping);
 
 	if (ret >= PAGE_SIZE) {
 		buf[PAGE_SIZE - 2] = '\n';
@@ -2264,6 +2124,16 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att
  */
 static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
 
+static u32 get_new_js_timeout(
+		u32 old_period,
+		u32 old_ticks,
+		u32 new_scheduling_period_ns)
+{
+	u64 ticks = (u64)old_period * (u64)old_ticks;
+	do_div(ticks, new_scheduling_period_ns);
+	return ticks?ticks:1;
+}
+
 /**
  * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
  *                            file
@@ -2286,12 +2156,15 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 	unsigned int js_scheduling_period;
 	u32 new_scheduling_period_ns;
 	u32 old_period;
-	u64 ticks;
+	struct kbasep_js_device_data *js_data;
+	unsigned long flags;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
+	js_data = &kbdev->js_data;
+
 	ret = kstrtouint(buf, 0, &js_scheduling_period);
 	if (ret || !js_scheduling_period) {
 		dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
@@ -2302,86 +2175,39 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 	new_scheduling_period_ns = js_scheduling_period * 1000000;
 
 	/* Update scheduling timeouts */
-	mutex_lock(&kbdev->js_data.runpool_mutex);
+	mutex_lock(&js_data->runpool_mutex);
+	spin_lock_irqsave(&js_data->runpool_irq.lock, flags);
 
 	/* If no contexts have been scheduled since js_timeouts was last written
 	 * to, the new timeouts might not have been latched yet. So check if an
 	 * update is pending and use the new values if necessary. */
 
 	/* Use previous 'new' scheduling period as a base if present. */
-	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns)
-		old_period = kbdev->js_scheduling_period_ns;
-	else
-		old_period = kbdev->js_data.scheduling_period_ns;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
-		ticks = (u64)kbdev->js_soft_stop_ticks * old_period;
-	else
-		ticks = (u64)kbdev->js_data.soft_stop_ticks *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_soft_stop_ticks = ticks ? ticks : 1;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
-		ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period;
-	else
-		ticks = (u64)kbdev->js_data.soft_stop_ticks_cl *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
-		ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period;
-	else
-		ticks = (u64)kbdev->js_data.hard_stop_ticks_ss *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1;
-
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
-		ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period;
-	else
-		ticks = (u64)kbdev->js_data.hard_stop_ticks_cl *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1;
+	old_period = js_data->scheduling_period_ns;
 
-	if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
-		ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period;
-	else
-		ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1;
+#define SET_TIMEOUT(name) \
+		(js_data->name = get_new_js_timeout(\
+				old_period, \
+				kbdev->js_data.name, \
+				new_scheduling_period_ns))
 
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
-		ticks = (u64)kbdev->js_reset_ticks_ss * old_period;
-	else
-		ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_reset_ticks_ss = ticks ? ticks : 1;
+	SET_TIMEOUT(soft_stop_ticks);
+	SET_TIMEOUT(soft_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_ss);
+	SET_TIMEOUT(hard_stop_ticks_cl);
+	SET_TIMEOUT(hard_stop_ticks_dumping);
+	SET_TIMEOUT(gpu_reset_ticks_ss);
+	SET_TIMEOUT(gpu_reset_ticks_cl);
+	SET_TIMEOUT(gpu_reset_ticks_dumping);
 
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
-		ticks = (u64)kbdev->js_reset_ticks_cl * old_period;
-	else
-		ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_reset_ticks_cl = ticks ? ticks : 1;
+#undef SET_TIMEOUT
 
-	if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
-		ticks = (u64)kbdev->js_reset_ticks_dumping * old_period;
-	else
-		ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping *
-				kbdev->js_data.scheduling_period_ns;
-	do_div(ticks, new_scheduling_period_ns);
-	kbdev->js_reset_ticks_dumping = ticks ? ticks : 1;
+	js_data->scheduling_period_ns = new_scheduling_period_ns;
 
-	kbdev->js_scheduling_period_ns = new_scheduling_period_ns;
-	kbdev->js_timeouts_updated = true;
+	kbase_js_set_timeouts(kbdev);
 
-	mutex_unlock(&kbdev->js_data.runpool_mutex);
+	spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags);
+	mutex_unlock(&js_data->runpool_mutex);
 
 	dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
 			js_scheduling_period);
@@ -2412,10 +2238,7 @@ static ssize_t show_js_scheduling_period(struct device *dev,
 	if (!kbdev)
 		return -ENODEV;
 
-	if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
-		period = kbdev->js_scheduling_period_ns;
-	else
-		period = kbdev->js_data.scheduling_period_ns;
+	period = kbdev->js_data.scheduling_period_ns;
 
 	ret = scnprintf(buf, PAGE_SIZE, "%d\n",
 			period / 1000000);
@@ -2715,6 +2538,8 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
 		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
 		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G71" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -3042,26 +2867,42 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
 		set_mem_pool_max_size);
 
 
-
-static int kbasep_secure_mode_init(struct kbase_device *kbdev)
+static int kbasep_protected_mode_enter(struct kbase_device *kbdev)
 {
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+		GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+	return 0;
+}
 
-#ifdef SECURE_CALLBACKS
-	kbdev->secure_ops = SECURE_CALLBACKS;
-	kbdev->secure_mode_support = false;
+static bool kbasep_protected_mode_supported(struct kbase_device *kbdev)
+{
+	return true;
+}
 
-	if (kbdev->secure_ops) {
-		int err;
+static struct kbase_protected_ops kbasep_protected_ops = {
+	.protected_mode_enter = kbasep_protected_mode_enter,
+	.protected_mode_reset = NULL,
+	.protected_mode_supported = kbasep_protected_mode_supported,
+};
 
-		/* Make sure secure mode is disabled on startup */
-		err = kbdev->secure_ops->secure_mode_disable(kbdev);
+static void kbasep_protected_mode_init(struct kbase_device *kbdev)
+{
+	kbdev->protected_ops = NULL;
 
-		/* secure_mode_disable() returns -EINVAL if not supported */
-		kbdev->secure_mode_support = (err != -EINVAL);
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		/* Use native protected ops */
+		kbdev->protected_ops = &kbasep_protected_ops;
 	}
+#ifdef PROTECTED_CALLBACKS
+	else
+		kbdev->protected_ops = PROTECTED_CALLBACKS;
 #endif
 
-	return 0;
+	if (kbdev->protected_ops)
+		kbdev->protected_mode_support =
+				kbdev->protected_ops->protected_mode_supported(kbdev);
+	else
+		kbdev->protected_mode_support = false;
 }
 
 #ifdef CONFIG_MALI_NO_MALI
@@ -3100,11 +2941,146 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 
 static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 {
-	iounmap(kbdev->reg);
-	release_mem_region(kbdev->reg_start, kbdev->reg_size);
+	if (kbdev->reg) {
+		iounmap(kbdev->reg);
+		release_mem_region(kbdev->reg_start, kbdev->reg_size);
+		kbdev->reg = NULL;
+		kbdev->reg_start = 0;
+		kbdev->reg_size = 0;
+	}
 }
 #endif /* CONFIG_MALI_NO_MALI */
 
+static int registers_map(struct kbase_device * const kbdev)
+{
+
+		/* the first memory resource is the physical address of the GPU
+		 * registers */
+		struct platform_device *pdev = to_platform_device(kbdev->dev);
+		struct resource *reg_res;
+		int err;
+
+		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!reg_res) {
+			dev_err(kbdev->dev, "Invalid register resource\n");
+			return -ENOENT;
+		}
+
+		kbdev->reg_start = reg_res->start;
+		kbdev->reg_size = resource_size(reg_res);
+
+		err = kbase_common_reg_map(kbdev);
+		if (err) {
+			dev_err(kbdev->dev, "Failed to map registers\n");
+			return err;
+		}
+
+	return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+	kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	int err = 0;
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+	if (IS_ERR_OR_NULL(kbdev->regulator)) {
+		err = PTR_ERR(kbdev->regulator);
+		kbdev->regulator = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get regulator\n");
+			return err;
+		}
+		dev_info(kbdev->dev,
+			"Continuing without Mali regulator control\n");
+		/* Allow probe to continue without regulator */
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR_OR_NULL(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		kbdev->clock = NULL;
+		if (err == -EPROBE_DEFER) {
+			dev_err(&pdev->dev, "Failed to get clock\n");
+			goto fail;
+		}
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			goto fail;
+		}
+	}
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+	/* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	err = of_init_opp_table(kbdev->dev);
+#else
+	err = 0;
+#endif /* LINUX_VERSION_CODE */
+	if (err)
+		dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+	return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+	clk_put(kbdev->clock);
+	kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+	if (NULL != kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif
+
+	return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	of_free_opp_table(kbdev->dev);
+#endif
+
+	if (kbdev->clock) {
+		clk_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+			&& defined(CONFIG_REGULATOR)
+	if (kbdev->regulator) {
+		regulator_put(kbdev->regulator);
+		kbdev->regulator = NULL;
+	}
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -3145,30 +3121,6 @@ MAKE_QUIRK_ACCESSORS(mmu);
 
 #endif /* KBASE_GPU_RESET_EN */
 
-static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p)
-{
-	struct kbase_device *kbdev = m->private;
-
-	if (!kbdev->secure_mode_support)
-		seq_puts(m, "unsupported\n");
-	else
-		seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N");
-
-	return 0;
-}
-
-static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file)
-{
-	return single_open(file, kbasep_secure_mode_seq_show, in->i_private);
-}
-
-static const struct file_operations kbasep_secure_mode_debugfs_fops = {
-	.open = kbasep_secure_mode_debugfs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 {
 	struct dentry *debugfs_ctx_defaults_directory;
@@ -3204,6 +3156,7 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 
 	kbase_debug_job_fault_debugfs_init(kbdev);
 	kbasep_gpu_memory_debugfs_init(kbdev);
+	kbase_as_fault_debugfs_init(kbdev);
 #if KBASE_GPU_RESET_EN
 	debugfs_create_file("quirks_sc", 0644,
 			kbdev->mali_debugfs_directory, kbdev,
@@ -3234,10 +3187,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 	kbasep_trace_timeline_debugfs_init(kbdev);
 #endif /* CONFIG_MALI_TRACE_TIMELINE */
 
-	debugfs_create_file("secure_mode", S_IRUGO,
-			kbdev->mali_debugfs_directory, kbdev,
-			&kbasep_secure_mode_debugfs_fops);
-
 	return 0;
 
 out:
@@ -3316,535 +3265,412 @@ static void kbase_logging_started_cb(void *data)
 }
 #endif
 
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+	&dev_attr_debug_command.attr,
+	&dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+	&dev_attr_force_replay.attr,
+#endif
+	&dev_attr_js_timeouts.attr,
+	&dev_attr_soft_job_timeout.attr,
+	&dev_attr_gpuinfo.attr,
+	&dev_attr_dvfs_period.attr,
+	&dev_attr_pm_poweroff.attr,
+	&dev_attr_reset_timeout.attr,
+	&dev_attr_js_scheduling_period.attr,
+	&dev_attr_power_policy.attr,
+	&dev_attr_core_availability_policy.attr,
+	&dev_attr_core_mask.attr,
+	&dev_attr_mem_pool_size.attr,
+	&dev_attr_mem_pool_max_size.attr,
+	NULL
+};
+
+static const struct attribute_group kbase_attr_group = {
+	.attrs = kbase_attrs,
+};
 
-static int kbase_common_device_init(struct kbase_device *kbdev)
+static int kbase_platform_device_remove(struct platform_device *pdev)
 {
-	int err;
-	struct mali_base_gpu_core_props *core_props;
-	enum {
-		inited_mem = (1u << 0),
-		inited_js = (1u << 1),
-		inited_pm_runtime_init = (1u << 6),
-#ifdef CONFIG_MALI_DEVFREQ
-		inited_devfreq = (1u << 9),
-#endif /* CONFIG_MALI_DEVFREQ */
-#ifdef CONFIG_MALI_MIPE_ENABLED
-		inited_tlstream = (1u << 10),
-#endif /* CONFIG_MALI_MIPE_ENABLED */
-		inited_backend_early = (1u << 11),
-		inited_backend_late = (1u << 12),
-		inited_device = (1u << 13),
-		inited_vinstr = (1u << 19),
-		inited_ipa = (1u << 20),
-		inited_job_fault = (1u << 21)
-	};
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	const struct list_head *dev_list;
 
-	int inited = 0;
-	u32 gpu_id;
-#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
-	u32 ve_logic_tile = 0;
-#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
+	if (!kbdev)
+		return -ENODEV;
 
-	dev_set_drvdata(kbdev->dev, kbdev);
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	if (kbdev->inited_subsys & inited_buslogger) {
+		bl_core_client_unregister(kbdev->buslogger);
+		kbdev->inited_subsys &= ~inited_buslogger;
+	}
+#endif
 
-	err = kbase_backend_early_init(kbdev);
-	if (err)
-		goto out_partial;
-	inited |= inited_backend_early;
+	if (kbdev->inited_subsys & inited_sysfs_group) {
+		sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+		kbdev->inited_subsys &= ~inited_sysfs_group;
+	}
 
-	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
-			kbase_dev_nr);
+	if (kbdev->inited_subsys & inited_dev_list) {
+		dev_list = kbase_dev_list_get();
+		list_del(&kbdev->entry);
+		kbase_dev_list_put(dev_list);
+		kbdev->inited_subsys &= ~inited_dev_list;
+	}
 
-	kbase_disjoint_init(kbdev);
+	if (kbdev->inited_subsys & inited_misc_register) {
+		misc_deregister(&kbdev->mdev);
+		kbdev->inited_subsys &= ~inited_misc_register;
+	}
 
-	/* obtain min/max configured gpu frequencies */
-	core_props = &(kbdev->gpu_props.props.core_props);
+	if (kbdev->inited_subsys & inited_get_device) {
+		put_device(kbdev->dev);
+		kbdev->inited_subsys &= ~inited_get_device;
+	}
 
-	/* For versatile express platforms, min and max values of GPU frequency
-	 * depend on the type of the logic tile; these values may not be known
-	 * at the build time so in some cases a platform config file with wrong
-	 * GPU freguency values may be included; to ensure the correct value of
-	 * min and max GPU frequency is obtained, the type of the logic tile is
-	 * read from the corresponding register on the platform and frequency
-	 * values assigned accordingly.*/
-#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
-	ve_logic_tile = kbase_get_platform_logic_tile_type();
-
-	switch (ve_logic_tile) {
-	case 0x217:
-		/* Virtex 6, HBI0217 */
-		core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN;
-		core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX;
-		break;
-	case 0x247:
-		/* Virtex 7, HBI0247 */
-		core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN;
-		core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX;
-		break;
-	default:
-		/* all other logic tiles, i.e., Virtex 5 HBI0192
-		 * or unsuccessful reading from the platform -
-		 * fall back to the config_platform default */
-		core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
-		core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
-		break;
+	if (kbdev->inited_subsys & inited_debugfs) {
+		kbase_device_debugfs_term(kbdev);
+		kbdev->inited_subsys &= ~inited_debugfs;
 	}
-#else
-		core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
-		core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
-#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
 
-	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+	if (kbdev->inited_subsys & inited_job_fault) {
+		kbase_debug_job_fault_dev_term(kbdev);
+		kbdev->inited_subsys &= ~inited_job_fault;
+	}
 
-	err = kbase_device_init(kbdev);
-	if (err) {
-		dev_err(kbdev->dev, "Can't initialize device (%d)\n", err);
-		goto out_partial;
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	if (kbdev->inited_subsys & inited_ipa) {
+		kbase_ipa_term(kbdev->ipa_ctx);
+		kbdev->inited_subsys &= ~inited_ipa;
 	}
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
 
-	inited |= inited_device;
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
 
-	if (kbdev->pm.callback_power_runtime_init) {
-		err = kbdev->pm.callback_power_runtime_init(kbdev);
-		if (err)
-			goto out_partial;
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
 
-		inited |= inited_pm_runtime_init;
+	if (kbdev->inited_subsys & inited_backend_late) {
+		kbase_backend_late_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_late;
 	}
 
-	err = kbase_mem_init(kbdev);
-	if (err)
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_tlstream) {
+		kbase_tlstream_term();
+		kbdev->inited_subsys &= ~inited_tlstream;
+	}
 
-	inited |= inited_mem;
+	/* Bring job and mem sys to a halt before we continue termination */
 
-	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
-	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	if (kbdev->inited_subsys & inited_js)
+		kbasep_js_devdata_halt(kbdev);
 
-	kbase_device_coherency_init(kbdev, gpu_id);
+	if (kbdev->inited_subsys & inited_mem)
+		kbase_mem_halt(kbdev);
 
-	err = kbasep_secure_mode_init(kbdev);
-	if (err)
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_js) {
+		kbasep_js_devdata_term(kbdev);
+		kbdev->inited_subsys &= ~inited_js;
+	}
 
-	err = kbasep_js_devdata_init(kbdev);
-	if (err)
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_mem) {
+		kbase_mem_term(kbdev);
+		kbdev->inited_subsys &= ~inited_mem;
+	}
 
-	inited |= inited_js;
+	if (kbdev->inited_subsys & inited_pm_runtime_init) {
+		kbdev->pm.callback_power_runtime_term(kbdev);
+		kbdev->inited_subsys &= ~inited_pm_runtime_init;
+	}
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
-	err = kbase_tlstream_init();
-	if (err) {
-		dev_err(kbdev->dev, "Couldn't initialize timeline stream\n");
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_device) {
+		kbase_device_term(kbdev);
+		kbdev->inited_subsys &= ~inited_device;
 	}
-	inited |= inited_tlstream;
-#endif /* CONFIG_MALI_MIPE_ENABLED */
 
-	err = kbase_backend_late_init(kbdev);
-	if (err)
-		goto out_partial;
-	inited |= inited_backend_late;
-
-#ifdef CONFIG_MALI_DEVFREQ
-	err = kbase_devfreq_init(kbdev);
-	if (err) {
-		dev_err(kbdev->dev, "Couldn't initialize devfreq\n");
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_backend_early) {
+		kbase_backend_early_term(kbdev);
+		kbdev->inited_subsys &= ~inited_backend_early;
 	}
-	inited |= inited_devfreq;
-#endif /* CONFIG_MALI_DEVFREQ */
 
-	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
-	if (!kbdev->vinstr_ctx) {
-		dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n");
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_power_control) {
+		power_control_term(kbdev);
+		kbdev->inited_subsys &= ~inited_power_control;
 	}
 
-	inited |= inited_vinstr;
-
-	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
-	if (!kbdev->ipa_ctx) {
-		dev_err(kbdev->dev, "Can't initialize IPA\n");
-		goto out_partial;
+	if (kbdev->inited_subsys & inited_registers_map) {
+		registers_unmap(kbdev);
+		kbdev->inited_subsys &= ~inited_registers_map;
 	}
 
-	inited |= inited_ipa;
-
-	err = kbase_debug_job_fault_dev_init(kbdev);
-	if (err)
-		goto out_partial;
-
-	inited |= inited_job_fault;
-
-	err = kbase_device_debugfs_init(kbdev);
-	if (err)
-		goto out_partial;
-
-	/* intialise the kctx list */
-	mutex_init(&kbdev->kctx_list_lock);
-	INIT_LIST_HEAD(&kbdev->kctx_list);
-
-	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
-	kbdev->mdev.name = kbdev->devname;
-	kbdev->mdev.fops = &kbase_fops;
-	kbdev->mdev.parent = get_device(kbdev->dev);
-
-	err = misc_register(&kbdev->mdev);
-	if (err) {
-		dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname);
-		goto out_misc;
-	}
-
-	{
-		const struct list_head *dev_list = kbase_dev_list_get();
-
-		list_add(&kbdev->entry, &kbase_dev_list);
-		kbase_dev_list_put(dev_list);
+#ifdef CONFIG_MALI_NO_MALI
+	if (kbdev->inited_subsys & inited_gpu_device) {
+		gpu_device_destroy(kbdev);
+		kbdev->inited_subsys &= ~inited_gpu_device;
 	}
+#endif /* CONFIG_MALI_NO_MALI */
 
-	dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device));
+	if (kbdev->inited_subsys != 0)
+		dev_err(kbdev->dev, "Missing sub system termination\n");
 
-	kbase_dev_nr++;
+	kbase_device_free(kbdev);
 
 	return 0;
-
-out_misc:
-	put_device(kbdev->dev);
-	kbase_device_debugfs_term(kbdev);
-out_partial:
-	if (inited & inited_job_fault)
-		kbase_debug_job_fault_dev_term(kbdev);
-	if (inited & inited_ipa)
-		kbase_ipa_term(kbdev->ipa_ctx);
-	if (inited & inited_vinstr)
-		kbase_vinstr_term(kbdev->vinstr_ctx);
-#ifdef CONFIG_MALI_DEVFREQ
-	if (inited & inited_devfreq)
-		kbase_devfreq_term(kbdev);
-#endif /* CONFIG_MALI_DEVFREQ */
-	if (inited & inited_backend_late)
-		kbase_backend_late_term(kbdev);
-#ifdef CONFIG_MALI_MIPE_ENABLED
-	if (inited & inited_tlstream)
-		kbase_tlstream_term();
-#endif /* CONFIG_MALI_MIPE_ENABLED */
-
-	if (inited & inited_js)
-		kbasep_js_devdata_halt(kbdev);
-
-	if (inited & inited_mem)
-		kbase_mem_halt(kbdev);
-
-	if (inited & inited_js)
-		kbasep_js_devdata_term(kbdev);
-
-	if (inited & inited_mem)
-		kbase_mem_term(kbdev);
-
-	if (inited & inited_pm_runtime_init) {
-		if (kbdev->pm.callback_power_runtime_term)
-			kbdev->pm.callback_power_runtime_term(kbdev);
-	}
-
-	if (inited & inited_device)
-		kbase_device_term(kbdev);
-
-	if (inited & inited_backend_early)
-		kbase_backend_early_term(kbdev);
-
-	return err;
 }
 
+extern void kbase_platform_rk_shutdown(struct kbase_device *kbdev);
+static void kbase_platform_device_shutdown(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
 
-static struct attribute *kbase_attrs[] = {
-#ifdef CONFIG_MALI_DEBUG
-	&dev_attr_debug_command.attr,
-	&dev_attr_js_softstop_always.attr,
-#endif
-#if !MALI_CUSTOMER_RELEASE
-	&dev_attr_force_replay.attr,
-#endif
-	&dev_attr_js_timeouts.attr,
-	&dev_attr_gpuinfo.attr,
-	&dev_attr_dvfs_period.attr,
-	&dev_attr_pm_poweroff.attr,
-	&dev_attr_reset_timeout.attr,
-	&dev_attr_js_scheduling_period.attr,
-	&dev_attr_power_policy.attr,
-	&dev_attr_core_availability_policy.attr,
-	&dev_attr_core_mask.attr,
-	&dev_attr_mem_pool_size.attr,
-	&dev_attr_mem_pool_max_size.attr,
-	NULL
-};
-
-static const struct attribute_group kbase_attr_group = {
-	.attrs = kbase_attrs,
-};
-
-static int kbase_common_device_remove(struct kbase_device *kbdev);
+	kbase_platform_rk_shutdown(kbdev);
+}
 
 static int kbase_platform_device_probe(struct platform_device *pdev)
 {
 	struct kbase_device *kbdev;
-	struct resource *reg_res;
+	struct mali_base_gpu_core_props *core_props;
+	u32 gpu_id;
+	const struct list_head *dev_list;
 	int err = 0;
-	int i;
 
 #ifdef CONFIG_OF
 	err = kbase_platform_early_init();
 	if (err) {
 		dev_err(&pdev->dev, "Early platform initialization failed\n");
+		kbase_platform_device_remove(pdev);
 		return err;
 	}
 #endif
 
 	kbdev = kbase_device_alloc();
 	if (!kbdev) {
-		dev_err(&pdev->dev, "Can't allocate device\n");
-		err = -ENOMEM;
-		goto out;
+		dev_err(&pdev->dev, "Allocate device failed\n");
+		kbase_platform_device_remove(pdev);
+		return -ENOMEM;
 	}
+
+	kbdev->dev = &pdev->dev;
+	dev_set_drvdata(kbdev->dev, kbdev);
+
 #ifdef CONFIG_MALI_NO_MALI
 	err = gpu_device_create(kbdev);
 	if (err) {
-		dev_err(&pdev->dev, "Can't initialize dummy model\n");
-		goto out_midg;
+		dev_err(&pdev->dev, "Dummy model initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
+	kbdev->inited_subsys |= inited_gpu_device;
 #endif /* CONFIG_MALI_NO_MALI */
 
-	kbdev->dev = &pdev->dev;
-	/* 3 IRQ resources */
-	for (i = 0; i < 3; i++) {
-		struct resource *irq_res;
-		int irqtag;
+	err = assign_irqs(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "IRQ search failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
 
-		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
-		if (!irq_res) {
-			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
-			err = -ENOENT;
-			goto out_platform_irq;
-		}
+	err = registers_map(kbdev);
+	if (err) {
+		dev_err(&pdev->dev, "Register map failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_registers_map;
 
-#ifdef CONFIG_OF
-		if (!strcmp(irq_res->name, "JOB")) {
-			irqtag = JOB_IRQ_TAG;
-		} else if (!strcmp(irq_res->name, "MMU")) {
-			irqtag = MMU_IRQ_TAG;
-		} else if (!strcmp(irq_res->name, "GPU")) {
-			irqtag = GPU_IRQ_TAG;
-		} else {
-			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
-				irq_res->name);
-			err = -EINVAL;
-			goto out_irq_name;
-		}
-#else
-		irqtag = i;
-#endif /* CONFIG_OF */
-		kbdev->irqs[irqtag].irq = irq_res->start;
-		kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK);
+	err = power_control_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Power control initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
-		/* the first memory resource is the physical address of the GPU
-		 * registers */
-		reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!reg_res) {
-			dev_err(kbdev->dev, "Invalid register resource\n");
-			err = -ENOENT;
-			goto out_platform_mem;
-		}
+	kbdev->inited_subsys |= inited_power_control;
 
-		kbdev->reg_start = reg_res->start;
-		kbdev->reg_size = resource_size(reg_res);
+	err = kbase_backend_early_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_backend_early;
 
-		err = kbase_common_reg_map(kbdev);
-		if (err)
-			goto out_reg_map;
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+			kbase_dev_nr);
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_REGULATOR)
-	kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
-	if (IS_ERR_OR_NULL(kbdev->regulator)) {
-		err = PTR_ERR(kbdev->regulator);
+	kbase_disjoint_init(kbdev);
 
-		kbdev->regulator = NULL;
-		if (err == -EPROBE_DEFER)
-			goto out_regulator;
-		dev_info(kbdev->dev, "Continuing without Mali regulator control\n");
-		/* Allow probe to continue without regulator */
-	}
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
 
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-	pm_runtime_enable(kbdev->dev);
-#endif
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
 
-	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
-	if (IS_ERR_OR_NULL(kbdev->clock)) {
-		err = PTR_ERR(kbdev->clock);
+	err = kbase_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_device;
 
-		kbdev->clock = NULL;
-		if (err == -EPROBE_DEFER)
-			goto out_clock_prepare;
-		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
-		/* Allow probe to continue without clock. */
-	} else {
-		err = clk_prepare_enable(kbdev->clock);
+	if (kbdev->pm.callback_power_runtime_init) {
+		err = kbdev->pm.callback_power_runtime_init(kbdev);
 		if (err) {
 			dev_err(kbdev->dev,
-				"Failed to prepare and enable clock (%d)\n", err);
-			goto out_clock_prepare;
+				"Runtime PM initialization failed\n");
+			kbase_platform_device_remove(pdev);
+			return err;
 		}
+		kbdev->inited_subsys |= inited_pm_runtime_init;
 	}
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_PM_OPP)
-	/* Register the OPPs if they are available in device tree */
-	if (dev_pm_opp_of_add_table(kbdev->dev) < 0)
-		dev_dbg(kbdev->dev, "OPP table not found\n");
-#endif
+	err = kbase_mem_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_mem;
 
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
-	err = kbase_common_device_init(kbdev);
+	kbase_device_coherency_init(kbdev, gpu_id);
+
+	kbasep_protected_mode_init(kbdev);
+
+	err = kbasep_js_devdata_init(kbdev);
 	if (err) {
-		dev_err(kbdev->dev, "Failed kbase_common_device_init\n");
-		goto out_common_init;
+		dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
+	kbdev->inited_subsys |= inited_js;
 
-	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	err = kbase_tlstream_init();
 	if (err) {
-		dev_err(&pdev->dev, "Failed to create sysfs entries\n");
-		goto out_sysfs;
+		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
+	kbdev->inited_subsys |= inited_tlstream;
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-	err = bl_core_client_register(kbdev->devname,
-						kbase_logging_started_cb,
-						kbdev, &kbdev->buslogger,
-						THIS_MODULE, NULL);
+	err = kbase_backend_late_init(kbdev);
 	if (err) {
-		dev_err(kbdev->dev, "Couldn't register bus log client\n");
-		goto out_bl_core_register;
+		dev_err(kbdev->dev, "Late backend initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
+	kbdev->inited_subsys |= inited_backend_late;
 
-	bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
-#endif
-	return 0;
-
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-out_bl_core_register:
-	sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
-#endif
-
-out_sysfs:
-	kbase_common_device_remove(kbdev);
-out_common_init:
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
-	dev_pm_opp_of_remove_table(kbdev->dev);
-#endif
-	clk_disable_unprepare(kbdev->clock);
-out_clock_prepare:
-	clk_put(kbdev->clock);
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-	pm_runtime_disable(kbdev->dev);
-#endif
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_REGULATOR)
-out_regulator:
-	regulator_put(kbdev->regulator);
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-		kbase_common_reg_unmap(kbdev);
-out_reg_map:
-out_platform_mem:
-#ifdef CONFIG_OF
-out_irq_name:
-#endif
-out_platform_irq:
-#ifdef CONFIG_MALI_NO_MALI
-	gpu_device_destroy(kbdev);
-out_midg:
-#endif /* CONFIG_MALI_NO_MALI */
-	kbase_device_free(kbdev);
-out:
-	return err;
-}
+#ifdef CONFIG_MALI_DEVFREQ
+	err = kbase_devfreq_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
 
-static int kbase_common_device_remove(struct kbase_device *kbdev)
-{
-	kbase_debug_job_fault_dev_term(kbdev);
-	kbase_ipa_term(kbdev->ipa_ctx);
-	kbase_vinstr_term(kbdev->vinstr_ctx);
-	sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+	if (!kbdev->vinstr_ctx) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-	if (kbdev->buslogger)
-		bl_core_client_unregister(kbdev->buslogger);
-#endif
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+	if (!kbdev->ipa_ctx) {
+		dev_err(kbdev->dev, "IPA initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
 
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(kbdev->mali_debugfs_directory);
-#endif
-#ifdef CONFIG_MALI_DEVFREQ
-	kbase_devfreq_term(kbdev);
-#endif
+	kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
 
-	kbase_backend_late_term(kbdev);
+	err = kbase_debug_job_fault_dev_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_job_fault;
 
-	if (kbdev->pm.callback_power_runtime_term)
-		kbdev->pm.callback_power_runtime_term(kbdev);
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-	pm_runtime_disable(kbdev->dev);
-#endif
+	err = kbase_device_debugfs_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "DebugFS initialization failed");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_debugfs;
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
-	kbase_tlstream_term();
-#endif /* CONFIG_MALI_MIPE_ENABLED */
+	/* initialize the kctx list */
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
 
-	kbasep_js_devdata_halt(kbdev);
-	kbase_mem_halt(kbdev);
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+	kbdev->inited_subsys |= inited_get_device;
 
-	kbasep_js_devdata_term(kbdev);
-	kbase_mem_term(kbdev);
-	kbase_backend_early_term(kbdev);
+	err = misc_register(&kbdev->mdev);
+	if (err) {
+		dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+			kbdev->devname);
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_misc_register;
 
-	{
-		const struct list_head *dev_list = kbase_dev_list_get();
+	dev_list = kbase_dev_list_get();
+	list_add(&kbdev->entry, &kbase_dev_list);
+	kbase_dev_list_put(dev_list);
+	kbdev->inited_subsys |= inited_dev_list;
 
-		list_del(&kbdev->entry);
-		kbase_dev_list_put(dev_list);
-	}
-	misc_deregister(&kbdev->mdev);
-	put_device(kbdev->dev);
-		kbase_common_reg_unmap(kbdev);
-	kbase_device_term(kbdev);
-	if (kbdev->clock) {
-		clk_disable_unprepare(kbdev->clock);
-		clk_put(kbdev->clock);
-		kbdev->clock = NULL;
+	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (err) {
+		dev_err(&pdev->dev, "SysFS group creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
 	}
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_REGULATOR)
-	regulator_put(kbdev->regulator);
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-#ifdef CONFIG_MALI_NO_MALI
-	gpu_device_destroy(kbdev);
-#endif /* CONFIG_MALI_NO_MALI */
-	kbase_device_free(kbdev);
+	kbdev->inited_subsys |= inited_sysfs_group;
 
-	return 0;
-}
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+	err = bl_core_client_register(kbdev->devname,
+						kbase_logging_started_cb,
+						kbdev, &kbdev->buslogger,
+						THIS_MODULE, NULL);
+	if (err == 0) {
+		kbdev->inited_subsys |= inited_buslogger;
+		bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+	} else {
+		dev_warn(kbdev->dev, "Bus log client registration failed\n");
+		err = 0;
+	}
+#endif
 
-static int kbase_platform_device_remove(struct platform_device *pdev)
-{
-	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+	dev_info(kbdev->dev,
+			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
 
-	if (!kbdev)
-		return -ENODEV;
+	kbase_dev_nr++;
 
-	return kbase_common_device_remove(kbdev);
+	return err;
 }
 
 /** Suspend callback from the OS.
@@ -4008,6 +3834,7 @@ MODULE_DEVICE_TABLE(of, kbase_dt_ids);
 static struct platform_driver kbase_platform_driver = {
 	.probe = kbase_platform_device_probe,
 	.remove = kbase_platform_device_remove,
+	.shutdown = kbase_platform_device_shutdown,
 	.driver = {
 		   .name = kbase_drv_name,
 		   .owner = THIS_MODULE,
@@ -4038,19 +3865,15 @@ static int __init kbase_driver_init(void)
 	if (ret)
 		return ret;
 
-#ifndef CONFIG_MACH_MANTA
 #ifdef CONFIG_MALI_PLATFORM_FAKE
 	ret = kbase_platform_fake_register();
 	if (ret)
 		return ret;
-#endif
 #endif
 	ret = platform_driver_register(&kbase_platform_driver);
-#ifndef CONFIG_MACH_MANTA
 #ifdef CONFIG_MALI_PLATFORM_FAKE
 	if (ret)
 		kbase_platform_fake_unregister();
-#endif
 #endif
 	return ret;
 }
@@ -4058,11 +3881,9 @@ static int __init kbase_driver_init(void)
 static void __exit kbase_driver_exit(void)
 {
 	platform_driver_unregister(&kbase_platform_driver);
-#ifndef CONFIG_MACH_MANTA
 #ifdef CONFIG_MALI_PLATFORM_FAKE
 	kbase_platform_fake_unregister();
 #endif
-#endif
 }
 
 module_init(kbase_driver_init);
@@ -4091,7 +3912,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter);
 
 void kbase_trace_mali_pm_status(u32 event, u64 value)
 {
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c
index f3e426f9539b..83c5c37942bd 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,51 +15,71 @@
 
 
 
-#include "mali_kbase_debug_job_fault.h"
+#include <mali_kbase.h>
+#include <linux/spinlock.h>
 
 #ifdef CONFIG_DEBUG_FS
 
-static bool kbase_is_job_fault_event_pending(struct list_head *event_list)
+static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev)
 {
-	bool ret;
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
+	bool             ret;
 
-	ret = (!list_empty(event_list));
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	ret = !list_empty(event_list);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 
 	return ret;
 }
 
-static bool kbase_ctx_has_no_event_pending(
-		struct kbase_context *kctx, struct list_head *event_list)
+static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
 	struct base_job_fault_event *event;
+	unsigned long               flags;
 
-	if (list_empty(event_list))
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 		return true;
+	}
 	list_for_each_entry(event, event_list, head) {
-		if (event->katom->kctx == kctx)
+		if (event->katom->kctx == kctx) {
+			spin_unlock_irqrestore(&kbdev->job_fault_event_lock,
+					flags);
 			return false;
+		}
 	}
-	return false;
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+	return true;
 }
 
 /* wait until the fault happen and copy the event */
 static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
-		struct list_head *event_list,
 		struct base_job_fault_event *event)
 {
+	struct list_head            *event_list = &kbdev->job_fault_event_list;
 	struct base_job_fault_event *event_in;
+	unsigned long               flags;
 
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	if (list_empty(event_list)) {
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 		if (wait_event_interruptible(kbdev->job_fault_wq,
-				kbase_is_job_fault_event_pending(event_list)))
+				 kbase_is_job_fault_event_pending(kbdev)))
 			return -ERESTARTSYS;
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	}
 
 	event_in = list_entry(event_list->next,
 			struct base_job_fault_event, head);
-
 	event->event_code = event_in->event_code;
 	event->katom = event_in->katom;
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+
 	return 0;
 
 }
@@ -102,12 +122,16 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx)
 static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
 {
 	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
 
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	while (!list_empty(event_list)) {
-
 		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	}
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 }
 
 static void kbase_job_fault_resume_worker(struct work_struct *data)
@@ -129,8 +153,7 @@ static void kbase_job_fault_resume_worker(struct work_struct *data)
 	 * atoms belong to the same context.
 	 */
 	wait_event(kctx->kbdev->job_fault_resume_wq,
-			kbase_ctx_has_no_event_pending(kctx,
-					&kctx->kbdev->job_fault_event_list));
+			 kbase_ctx_has_no_event_pending(kctx));
 
 	atomic_set(&kctx->job_fault_count, 0);
 	kbase_jd_done_worker(&katom->work);
@@ -166,9 +189,12 @@ static void kbase_job_fault_event_post(struct kbase_device *kbdev,
 		struct kbase_jd_atom *katom, u32 completion_code)
 {
 	struct base_job_fault_event *event;
+	unsigned long flags;
 
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list,
 				katom, completion_code);
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 
 	wake_up_interruptible(&kbdev->job_fault_wq);
 
@@ -293,9 +319,10 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos)
 	 */
 	if (*pos == 0) {
 		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return NULL;
 		event->reg_offset = 0;
-		if (kbase_job_fault_event_wait(kbdev,
-				&kbdev->job_fault_event_list, event)) {
+		if (kbase_job_fault_event_wait(kbdev, event)) {
 			kfree(event);
 			return NULL;
 		}
@@ -329,11 +356,15 @@ static void debug_job_fault_stop(struct seq_file *m, void *v)
 		dev_info(kbdev->dev, "debug job fault seq stop stage 1");
 
 	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 		if (!list_empty(&kbdev->job_fault_event_list)) {
 			kbase_job_fault_event_dequeue(kbdev,
 				&kbdev->job_fault_event_list);
 			wake_up(&kbdev->job_fault_resume_wq);
 		}
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 		dev_info(kbdev->dev, "debug job fault seq stop stage 2");
 	}
 
@@ -404,6 +435,7 @@ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
 
 	init_waitqueue_head(&(kbdev->job_fault_wq));
 	init_waitqueue_head(&(kbdev->job_fault_resume_wq));
+	spin_lock_init(&kbdev->job_fault_event_lock);
 
 	kbdev->job_fault_resume_workq = alloc_workqueue(
 			"kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h
index 0930f905e4ef..a2bf8983c37c 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_job_fault.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -18,7 +18,6 @@
 #ifndef _KBASE_DEBUG_JOB_FAULT_H
 #define _KBASE_DEBUG_JOB_FAULT_H
 
-#include <mali_kbase.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c
index 42d1d832c0a3..a98355e33d07 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_debug_mem_view.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -125,6 +125,8 @@ static int debug_mem_show(struct seq_file *m, void *v)
 
 	page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
 	mapping = vmap(&page, 1, VM_MAP, prot);
+	if (!mapping)
+		goto out;
 
 	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
 		seq_printf(m, "%016llx:", i + ((map->start_pfn +
@@ -160,11 +162,15 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	int ret;
 
 	ret = seq_open(file, &ops);
-
 	if (ret)
 		return ret;
 
 	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+	if (!mem_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	mem_data->kctx = kctx;
 
 	INIT_LIST_HEAD(&mem_data->mapping_list);
@@ -184,6 +190,11 @@ static int debug_mem_open(struct inode *i, struct file *file)
 			continue;
 
 		mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping) {
+			ret = -ENOMEM;
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
 
 		mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
 		mapping->start_pfn = reg->start_pfn;
@@ -197,6 +208,23 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	((struct seq_file *)file->private_data)->private = mem_data;
 
 	return 0;
+
+out:
+	if (mem_data) {
+		while (!list_empty(&mem_data->mapping_list)) {
+			struct debug_mem_mapping *mapping;
+
+			mapping = list_first_entry(&mem_data->mapping_list,
+					struct debug_mem_mapping, node);
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
+		fput(kctx_file);
+		kfree(mem_data);
+	}
+	seq_release(i, file);
+	return ret;
 }
 
 static int debug_mem_release(struct inode *inode, struct file *file)
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h
index 0fc5ff95234e..4bb8c2c7aec2 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,7 @@
 #include <mali_kbase_mem_lowlevel.h>
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_mmu_mode.h>
-#include <mali_kbase_instr.h>
+#include <mali_kbase_instr_defs.h>
 
 #include <linux/atomic.h>
 #include <linux/mempool.h>
@@ -53,6 +53,8 @@
 #include "sync.h"
 #endif				/* CONFIG_SYNC */
 
+#include "mali_kbase_dma_fence.h"
+
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #endif				/* CONFIG_DEBUG_FS */
@@ -175,18 +177,18 @@
 #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
 /** Atom has caused us to enter disjoint state */
 #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
-/* Atom has fail dependency on same-slot dependency */
-#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6)
 /* Atom blocked on cross-slot dependency */
 #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
 /* Atom has fail dependency on cross-slot dependency */
 #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
-/* Atom has been submitted to JSCTX ringbuffers */
-#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9)
+/* Atom is currently in the list of atoms blocked on cross-slot dependencies */
+#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
 /* Atom is currently holding a context reference */
 #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
-/* Atom requires GPU to be in secure mode */
-#define KBASE_KATOM_FLAG_SECURE (1<<11)
+/* Atom requires GPU to be in protected mode */
+#define KBASE_KATOM_FLAG_PROTECTED (1<<11)
+/* Atom has been stored in runnable_tree */
+#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
 
 /* SW related flags about types of JS_COMMAND action
  * NOTE: These must be masked off by JS_COMMAND_MASK */
@@ -233,11 +235,11 @@ struct kbase_jd_atom_dependency {
  *
  * @return readonly reference to dependent ATOM.
  */
-static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
 {
 	LOCAL_ASSERT(dep != NULL);
 
-	return (const struct kbase_jd_atom * const)(dep->atom);
+	return (const struct kbase_jd_atom *)(dep->atom);
 }
 
 /**
@@ -248,7 +250,7 @@ static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const st
  *
  * @return A dependency type value.
  */
-static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
 {
 	LOCAL_ASSERT(dep != NULL);
 
@@ -299,13 +301,15 @@ enum kbase_atom_gpu_rb_state {
 	KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
 	/* Atom is in slot ringbuffer but is blocked on a previous atom */
 	KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+	/* Atom is in slot ringbuffer but is waiting for proected mode exit */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT,
 	/* Atom is in slot ringbuffer but is waiting for cores to become
 	 * available */
 	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
 	/* Atom is in slot ringbuffer but is blocked on affinity */
 	KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
-	/* Atom is in slot ringbuffer but is waiting for secure mode switch */
-	KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE,
+	/* Atom is in slot ringbuffer but is waiting for protected mode entry */
+	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY,
 	/* Atom is in slot ringbuffer and ready to run */
 	KBASE_ATOM_GPU_RB_READY,
 	/* Atom is in slot ringbuffer and has been submitted to the GPU */
@@ -315,6 +319,23 @@ enum kbase_atom_gpu_rb_state {
 	KBASE_ATOM_GPU_RB_RETURN_TO_JS
 };
 
+enum kbase_atom_exit_protected_state {
+	/*
+	 * Starting state:
+	 * Check if a transition out of protected mode is required.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_CHECK,
+	/* Wait for the L2 to become idle in preparation for the reset. */
+	KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
+	/* Issue the protected reset. */
+	KBASE_ATOM_EXIT_PROTECTED_RESET,
+	/*
+	 * End state;
+	 * Wait for the reset to complete.
+	 */
+	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
+};
+
 struct kbase_ext_res {
 	u64 gpu_address;
 	struct kbase_mem_phy_alloc *alloc;
@@ -331,6 +352,13 @@ struct kbase_jd_atom {
 	struct list_head dep_head[2];
 	struct list_head dep_item[2];
 	const struct kbase_jd_atom_dependency dep[2];
+	/* List head used during job dispatch job_done processing - as
+	 * dependencies may not be entirely resolved at this point, we need to
+	 * use a separate list head. */
+	struct list_head jd_item;
+	/* true if atom's jd_item is currently on a list. Prevents atom being
+	 * processed twice. */
+	bool in_jd_list;
 
 	u16 nr_extres;
 	struct kbase_ext_res *extres;
@@ -348,6 +376,59 @@ struct kbase_jd_atom {
 	struct sync_fence *fence;
 	struct sync_fence_waiter sync_waiter;
 #endif				/* CONFIG_SYNC */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		/* This points to the dma-buf fence for this atom. If this is
+		 * NULL then there is no fence for this atom and the other
+		 * fields related to dma_fence may have invalid data.
+		 *
+		 * The context and seqno fields contain the details for this
+		 * fence.
+		 *
+		 * This fence is signaled when the katom is completed,
+		 * regardless of the event_code of the katom (signal also on
+		 * failure).
+		 */
+		struct fence *fence;
+		/* The dma-buf fence context number for this atom. A unique
+		 * context number is allocated to each katom in the context on
+		 * context creation.
+		 */
+		unsigned int context;
+		/* The dma-buf fence sequence number for this atom. This is
+		 * increased every time this katom uses dma-buf fence.
+		 */
+		atomic_t seqno;
+		/* This contains a list of all callbacks set up to wait on
+		 * other fences.  This atom must be held back from JS until all
+		 * these callbacks have been called and dep_count have reached
+		 * 0. The initial value of dep_count must be equal to the
+		 * number of callbacks on this list.
+		 *
+		 * This list is protected by jctx.lock. Callbacks are added to
+		 * this list when the atom is built and the wait are set up.
+		 * All the callbacks then stay on the list until all callbacks
+		 * have been called and the atom is queued, or cancelled, and
+		 * then all callbacks are taken off the list and freed.
+		 */
+		struct list_head callbacks;
+		/* Atomic counter of number of outstandind dma-buf fence
+		 * dependencies for this atom. When dep_count reaches 0 the
+		 * atom may be queued.
+		 *
+		 * The special value "-1" may only be set after the count
+		 * reaches 0, while holding jctx.lock. This indicates that the
+		 * atom has been handled, either queued in JS or cancelled.
+		 *
+		 * If anyone but the dma-fence worker sets this to -1 they must
+		 * ensure that any potentially queued worker must have
+		 * completed before allowing the atom to be marked as unused.
+		 * This can be done by flushing the fence work queue:
+		 * kctx->dma_fence.wq.
+		 */
+		atomic_t dep_count;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
 
 	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
 	enum base_jd_event_code event_code;
@@ -383,6 +464,11 @@ struct kbase_jd_atom {
 
 	atomic_t blocked;
 
+	/* Pointer to atom that this atom has same-slot dependency on */
+	struct kbase_jd_atom *pre_dep;
+	/* Pointer to atom that has same-slot dependency on this atom */
+	struct kbase_jd_atom *post_dep;
+
 	/* Pointer to atom that this atom has cross-slot dependency on */
 	struct kbase_jd_atom *x_pre_dep;
 	/* Pointer to atom that has cross-slot dependency on this atom */
@@ -396,11 +482,32 @@ struct kbase_jd_atom {
 #ifdef CONFIG_DEBUG_FS
 	struct base_job_fault_event fault_event;
 #endif
+
+	/* List head used for two different purposes:
+	 *  1. Overflow list for JS ring buffers. If an atom is ready to run,
+	 *     but there is no room in the JS ring buffer, then the atom is put
+	 *     on the ring buffer's overflow list using this list node.
+	 *  2. List of waiting soft jobs.
+	 */
+	struct list_head queue;
+
+	struct kbase_va_region *jit_addr_reg;
+
+	/* If non-zero, this indicates that the atom will fail with the set
+	 * event_code when the atom is processed. */
+	enum base_jd_event_code will_fail_event_code;
+
+	enum kbase_atom_exit_protected_state exit_protected_state;
+
+	struct rb_node runnable_tree_node;
+
+	/* 'Age' of atom relative to other atoms in the context. */
+	u32 age;
 };
 
-static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
+static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
 {
-	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE);
+	return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
 }
 
 /*
@@ -476,6 +583,7 @@ typedef u32 kbase_as_poke_state;
 struct kbase_mmu_setup {
 	u64	transtab;
 	u64	memattr;
+	u64	transcfg;
 };
 
 /**
@@ -494,6 +602,7 @@ struct kbase_as {
 	enum kbase_mmu_fault_type fault_type;
 	u32 fault_status;
 	u64 fault_addr;
+	u64 fault_extra_addr;
 	struct mutex transaction_mutex;
 
 	struct kbase_mmu_setup current_setup;
@@ -720,27 +829,36 @@ struct kbase_pm_device_data {
 };
 
 /**
- * struct kbase_secure_ops - Platform specific functions for GPU secure mode
- * operations
- * @secure_mode_enable:  Callback to enable secure mode on the GPU
- * @secure_mode_disable: Callback to disable secure mode on the GPU
+ * struct kbase_protected_ops - Platform specific functions for GPU protected
+ * mode operations
+ * @protected_mode_enter: Callback to enter protected mode on the GPU
+ * @protected_mode_reset: Callback to reset the GPU and exit protected mode.
+ * @protected_mode_supported: Callback to check if protected mode is supported.
  */
-struct kbase_secure_ops {
+struct kbase_protected_ops {
+	/**
+	 * protected_mode_enter() - Enter protected mode on the GPU
+	 * @kbdev:	The kbase device
+	 *
+	 * Return: 0 on success, non-zero on error
+	 */
+	int (*protected_mode_enter)(struct kbase_device *kbdev);
+
 	/**
-	 * secure_mode_enable() - Enable secure mode on the GPU
+	 * protected_mode_reset() - Reset the GPU and exit protected mode
 	 * @kbdev:	The kbase device
 	 *
 	 * Return: 0 on success, non-zero on error
 	 */
-	int (*secure_mode_enable)(struct kbase_device *kbdev);
+	int (*protected_mode_reset)(struct kbase_device *kbdev);
 
 	/**
-	 * secure_mode_disable() - Disable secure mode on the GPU
+	 * protected_mode_supported() - Check if protected mode is supported
 	 * @kbdev:	The kbase device
 	 *
 	 * Return: 0 on success, non-zero on error
 	 */
-	int (*secure_mode_disable)(struct kbase_device *kbdev);
+	bool (*protected_mode_supported)(struct kbase_device *kbdev);
 };
 
 
@@ -787,13 +905,13 @@ struct kbase_device {
 	u64 reg_start;
 	size_t reg_size;
 	void __iomem *reg;
+
 	struct {
 		int irq;
 		int flags;
 	} irqs[3];
-#ifdef CONFIG_HAVE_CLK
+
 	struct clk *clock;
-#endif
 #ifdef CONFIG_REGULATOR
 	struct regulator *regulator;
 #endif
@@ -807,7 +925,7 @@ struct kbase_device {
 	atomic_t serving_gpu_irq;
 	atomic_t serving_mmu_irq;
 	spinlock_t reg_op_lock;
-#endif				/* CONFIG_MALI_NO_MALI */
+#endif	/* CONFIG_MALI_NO_MALI */
 
 	struct kbase_pm_device_data pm;
 	struct kbasep_js_device_data js_data;
@@ -879,16 +997,13 @@ struct kbase_device {
 	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
 
 	/* Structure used for instrumentation and HW counters dumping */
-	struct {
+	struct kbase_hwcnt {
 		/* The lock should be used when accessing any of the following members */
 		spinlock_t lock;
 
 		struct kbase_context *kctx;
 		u64 addr;
 
-		struct kbase_context *suspended_kctx;
-		struct kbase_uk_hwcnt_setup suspended_state;
-
 		struct kbase_instr_backend backend;
 	} hwcnt;
 
@@ -904,30 +1019,6 @@ struct kbase_device {
 	struct kbase_trace            *trace_rbuf;
 #endif
 
-	/* This is used to override the current job scheduler values for
-	 * JS_SCHEDULING_PERIOD_NS
-	 * JS_SOFT_STOP_TICKS
-	 * JS_SOFT_STOP_TICKS_CL
-	 * JS_HARD_STOP_TICKS_SS
-	 * JS_HARD_STOP_TICKS_CL
-	 * JS_HARD_STOP_TICKS_DUMPING
-	 * JS_RESET_TICKS_SS
-	 * JS_RESET_TICKS_CL
-	 * JS_RESET_TICKS_DUMPING.
-	 *
-	 * These values are set via the js_timeouts sysfs file.
-	 */
-	u32 js_scheduling_period_ns;
-	int js_soft_stop_ticks;
-	int js_soft_stop_ticks_cl;
-	int js_hard_stop_ticks_ss;
-	int js_hard_stop_ticks_cl;
-	int js_hard_stop_ticks_dumping;
-	int js_reset_ticks_ss;
-	int js_reset_ticks_cl;
-	int js_reset_ticks_dumping;
-	bool js_timeouts_updated;
-
 	u32 reset_timeout_ms;
 
 	struct mutex cacheclean_lock;
@@ -945,8 +1036,12 @@ struct kbase_device {
 	unsigned long current_freq;
 	unsigned long current_voltage;
 #ifdef CONFIG_DEVFREQ_THERMAL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	struct devfreq_cooling_device *devfreq_cooling;
+#else
 	struct thermal_cooling_device *devfreq_cooling;
 #endif
+#endif
 #endif
 
 	struct kbase_ipa_context *ipa_ctx;
@@ -967,11 +1062,17 @@ struct kbase_device {
 	/* Root directory for per context entry */
 	struct dentry *debugfs_ctx_directory;
 
+#ifdef CONFIG_MALI_DEBUG
+	/* bit for each as, set if there is new data to report */
+	u64 debugfs_as_read_bitmap;
+#endif /* CONFIG_MALI_DEBUG */
+
 	/* failed job dump, used for separate debug process */
 	wait_queue_head_t job_fault_wq;
 	wait_queue_head_t job_fault_resume_wq;
 	struct workqueue_struct *job_fault_resume_workq;
 	struct list_head job_fault_event_list;
+	spinlock_t job_fault_event_lock;
 	struct kbase_context *kctx_fault;
 
 #if !MALI_CUSTOMER_RELEASE
@@ -1017,24 +1118,41 @@ struct kbase_device {
 
 
 	/* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active_default;
+#else
 	u32 infinite_cache_active_default;
+#endif
 	size_t mem_pool_max_size_default;
 
 	/* system coherency mode  */
 	u32 system_coherency;
+	/* Flag to track when cci snoops have been enabled on the interface */
+	bool cci_snoop_enabled;
+
+	/* SMC function IDs to call into Trusted firmware to enable/disable
+	 * cache snooping. Value of 0 indicates that they are not used
+	 */
+	u32 snoop_enable_smc;
+	u32 snoop_disable_smc;
 
-	/* Secure operations */
-	struct kbase_secure_ops *secure_ops;
+	/* Protected operations */
+	struct kbase_protected_ops *protected_ops;
 
 	/*
-	 * true when GPU is put into secure mode
+	 * true when GPU is put into protected mode
 	 */
-	bool secure_mode;
+	bool protected_mode;
 
 	/*
-	 * true if secure mode is supported
+	 * true when GPU is transitioning into or out of protected mode
 	 */
-	bool secure_mode_support;
+	bool protected_mode_transition;
+
+	/*
+	 * true if protected mode is supported
+	 */
+	bool protected_mode_support;
 
 
 #ifdef CONFIG_MALI_DEBUG
@@ -1050,47 +1168,27 @@ struct kbase_device {
 #endif
 	/* Boolean indicating if an IRQ flush during reset is in progress. */
 	bool irq_reset_flush;
-};
 
-/* JSCTX ringbuffer size must always be a power of 2 */
-#define JSCTX_RB_SIZE 256
-#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1)
-
-/**
- * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer
- * @atom_id: Atom ID
- */
-struct jsctx_rb_entry {
-	u16 atom_id;
+	/* list of inited sub systems. Used during terminate/error recovery */
+	u32 inited_subsys;
 };
 
 /**
- * struct jsctx_rb - JS context atom ring buffer
- * @entries:     Array of size %JSCTX_RB_SIZE which holds the &struct
- *               kbase_jd_atom pointers which make up the contents of the ring
- *               buffer.
- * @read_idx:    Index into @entries. Indicates the next entry in @entries to
- *               read, and is incremented when pulling an atom, and decremented
- *               when unpulling.
- *               HW access lock must be held when accessing.
- * @write_idx:   Index into @entries. Indicates the next entry to use when
- *               adding atoms into the ring buffer, and is incremented when
- *               adding a new atom.
- *               jctx->lock must be held when accessing.
- * @running_idx: Index into @entries. Indicates the last valid entry, and is
- *               incremented when remving atoms from the ring buffer.
- *               HW access lock must be held when accessing.
+ * struct jsctx_queue - JS context atom queue
+ * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
+ *                 job slot.
+ * @x_dep_head:    Head item of the linked list of atoms blocked on cross-slot
+ *                 dependencies. Atoms on this list will be moved to the
+ *                 runnable_tree when the blocking atom completes.
  *
- * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom.
+ * runpool_irq.lock must be held when accessing this structure.
  */
-struct jsctx_rb {
-	struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
-
-	u16 read_idx; /* HW access lock must be held when accessing */
-	u16 write_idx; /* jctx->lock must be held when accessing */
-	u16 running_idx; /* HW access lock must be held when accessing */
+struct jsctx_queue {
+	struct rb_root runnable_tree;
+	struct list_head x_dep_head;
 };
 
+
 #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
 					 (((minor) & 0xFFF) << 8) | \
 					 ((0 & 0xFF) << 0))
@@ -1102,10 +1200,12 @@ struct kbase_context {
 	unsigned long api_version;
 	phys_addr_t pgd;
 	struct list_head event_list;
+	struct list_head event_coalesce_list;
 	struct mutex event_mutex;
 	atomic_t event_closed;
 	struct workqueue_struct *event_workq;
 	atomic_t event_count;
+	int event_coalesce_count;
 
 	bool is_compat;
 
@@ -1116,6 +1216,7 @@ struct kbase_context {
 
 	struct page *aliasing_sink_page;
 
+	struct mutex            mmu_lock;
 	struct mutex            reg_lock; /* To be converted to a rwlock? */
 	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
 
@@ -1132,10 +1233,21 @@ struct kbase_context {
 
 	struct kbase_mem_pool mem_pool;
 
+	struct shrinker         reclaim;
+	struct list_head        evict_list;
+	struct mutex            evict_lock;
+
 	struct list_head waiting_soft_jobs;
+	spinlock_t waiting_soft_jobs_lock;
 #ifdef CONFIG_KDS
 	struct list_head waiting_kds_resource;
 #endif
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct {
+		struct list_head waiting_resource;
+		struct workqueue_struct *wq;
+	} dma_fence;
+#endif /* CONFIG_MALI_DMA_FENCE */
 	/** This is effectively part of the Run Pool, because it only has a valid
 	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
 	 *
@@ -1157,6 +1269,8 @@ struct kbase_context {
 	 * All other flags must be added there */
 	spinlock_t         mm_update_lock;
 	struct mm_struct *process_mm;
+	/* End of the SAME_VA zone */
+	u64 same_va_end;
 
 #ifdef CONFIG_MALI_TRACE_TIMELINE
 	struct kbase_trace_kctx_timeline timeline;
@@ -1182,7 +1296,7 @@ struct kbase_context {
 
 #endif /* CONFIG_DEBUG_FS */
 
-	struct jsctx_rb jsctx_rb
+	struct jsctx_queue jsctx_queue
 		[KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
 
 	/* Number of atoms currently pulled from this context */
@@ -1193,13 +1307,14 @@ struct kbase_context {
 	bool pulled;
 	/* true if infinite cache is to be enabled for new allocations. Existing
 	 * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+	bool infinite_cache_active;
+#else
 	u32 infinite_cache_active;
+#endif
 	/* Bitmask of slots that can be pulled from */
 	u32 slots_pullable;
 
-	/* true if address space assignment is pending */
-	bool as_pending;
-
 	/* Backend specific data */
 	struct kbase_context_backend backend;
 
@@ -1220,6 +1335,52 @@ struct kbase_context {
 
 	/* true if context is counted in kbdev->js_data.nr_contexts_runnable */
 	bool ctx_runnable_ref;
+
+	/* Waiting soft-jobs will fail when this timer expires */
+	struct timer_list soft_job_timeout;
+
+	/* JIT allocation management */
+	struct kbase_va_region *jit_alloc[256];
+	struct list_head jit_active_head;
+	struct list_head jit_pool_head;
+	struct list_head jit_destroy_head;
+	struct mutex jit_lock;
+	struct work_struct jit_work;
+
+	/* External sticky resource management */
+	struct list_head ext_res_meta_head;
+
+	/* Used to record that a drain was requested from atomic context */
+	atomic_t drain_pending;
+
+	/* Current age count, used to determine age for newly submitted atoms */
+	u32 age_count;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context mapping is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+	struct list_head ext_res_node;
+	struct kbase_mem_phy_alloc *alloc;
+	u64 gpu_addr;
 };
 
 enum kbase_reg_access_type {
@@ -1249,7 +1410,7 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 }
 
 /* Conversion helpers for setting up high resolution timers */
-#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
 
 /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
@@ -1260,4 +1421,29 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 /* Maximum number of times a job can be replayed */
 #define BASEP_JD_REPLAY_LIMIT 15
 
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+	u32 exception_status;
+	u32 first_incomplete_task;
+	u64 fault_pointer;
+	u8 job_descriptor_size : 1;
+	u8 job_type : 7;
+	u8 job_barrier : 1;
+	u8 _reserved_01 : 1;
+	u8 _reserved_1 : 1;
+	u8 _reserved_02 : 1;
+	u8 _reserved_03 : 1;
+	u8 _reserved_2 : 1;
+	u8 _reserved_04 : 1;
+	u8 _reserved_05 : 1;
+	u16 job_index;
+	u16 job_dependency_index_1;
+	u16 job_dependency_index_2;
+	union {
+		u64 _64;
+		u32 _32;
+	} next_job;
+};
+
 #endif				/* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c
index c22e099db6f9..62ab0caf9858 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_device.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
+#include <mali_kbase_hwaccess_instr.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
 
@@ -145,8 +146,32 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev)
 int kbase_device_init(struct kbase_device * const kbdev)
 {
 	int i, err;
+#ifdef CONFIG_ARM64
+	struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
 
 	spin_lock_init(&kbdev->mmu_mask_change);
+#ifdef CONFIG_ARM64
+	kbdev->cci_snoop_enabled = false;
+	np = kbdev->dev->of_node;
+	if (np != NULL) {
+		if (of_property_read_u32(np, "snoop_enable_smc",
+					&kbdev->snoop_enable_smc))
+			kbdev->snoop_enable_smc = 0;
+		if (of_property_read_u32(np, "snoop_disable_smc",
+					&kbdev->snoop_disable_smc))
+			kbdev->snoop_disable_smc = 0;
+		/* Either both or none of the calls should be provided. */
+		if (!((kbdev->snoop_disable_smc == 0
+			&& kbdev->snoop_enable_smc == 0)
+			|| (kbdev->snoop_disable_smc != 0
+			&& kbdev->snoop_enable_smc != 0))) {
+			WARN_ON(1);
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+#endif /* CONFIG_ARM64 */
 	/* Get the list of workarounds for issues on the current HW
 	 * (identified by the GPU_ID register)
 	 */
@@ -220,7 +245,11 @@ int kbase_device_init(struct kbase_device * const kbdev)
 
 	kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
 	kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 #ifdef CONFIG_MALI_DEBUG
 	init_waitqueue_head(&kbdev->driver_inactive_wait);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c
index 0d0c5258aaa4..bf8c304610eb 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_event.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -19,10 +19,7 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_debug.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
@@ -38,10 +35,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
 
 	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
 	kbase_tlstream_tl_del_atom(katom);
-#endif
 
 	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
 
@@ -147,6 +142,29 @@ static void kbase_event_process_noreport(struct kbase_context *kctx,
 	}
 }
 
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+	const int event_count = kctx->event_coalesce_count;
+
+	/* Join the list of pending events onto the tail of the main list
+	   and reset it */
+	list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+	kctx->event_coalesce_count = 0;
+
+	/* Return the number of events moved */
+	return event_count;
+}
+
 void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 {
 	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
@@ -163,12 +181,24 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 		return;
 	}
 
-	mutex_lock(&ctx->event_mutex);
-	atomic_inc(&ctx->event_count);
-	list_add_tail(&atom->dep_item[0], &ctx->event_list);
-	mutex_unlock(&ctx->event_mutex);
+	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+		/* Don't report the event until other event(s) have completed */
+		mutex_lock(&ctx->event_mutex);
+		list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+		++ctx->event_coalesce_count;
+		mutex_unlock(&ctx->event_mutex);
+	} else {
+		/* Report the event and any pending events now */
+		int event_count = 1;
 
-	kbase_event_wakeup(ctx);
+		mutex_lock(&ctx->event_mutex);
+		event_count += kbase_event_coalesce(ctx);
+		list_add_tail(&atom->dep_item[0], &ctx->event_list);
+		atomic_add(event_count, &ctx->event_count);
+		mutex_unlock(&ctx->event_mutex);
+
+		kbase_event_wakeup(ctx);
+	}
 }
 KBASE_EXPORT_TEST_API(kbase_event_post);
 
@@ -185,8 +215,10 @@ int kbase_event_init(struct kbase_context *kctx)
 	KBASE_DEBUG_ASSERT(kctx);
 
 	INIT_LIST_HEAD(&kctx->event_list);
+	INIT_LIST_HEAD(&kctx->event_coalesce_list);
 	mutex_init(&kctx->event_mutex);
 	atomic_set(&kctx->event_count, 0);
+	kctx->event_coalesce_count = 0;
 	atomic_set(&kctx->event_closed, false);
 	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
 
@@ -200,6 +232,8 @@ KBASE_EXPORT_TEST_API(kbase_event_init);
 
 void kbase_event_cleanup(struct kbase_context *kctx)
 {
+	int event_count;
+
 	KBASE_DEBUG_ASSERT(kctx);
 	KBASE_DEBUG_ASSERT(kctx->event_workq);
 
@@ -212,6 +246,9 @@ void kbase_event_cleanup(struct kbase_context *kctx)
 	 * Note: use of kctx->event_list without a lock is safe because this must be the last
 	 * thread using it (because we're about to terminate the lock)
 	 */
+	event_count = kbase_event_coalesce(kctx);
+	atomic_add(event_count, &kctx->event_count);
+
 	while (!list_empty(&kctx->event_list)) {
 		struct base_jd_event_v2 event;
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c
index a2174b24ac3c..4af3e4815e95 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_api.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
 #include "mali_kbase_mem_linux.h"
 #include "mali_kbase_gator_api.h"
 #include "mali_kbase_gator_hwcnt_names.h"
-#include "mali_kbase_instr.h"
 
 #define MALI_MAX_CORES_PER_GROUP		4
 #define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
@@ -28,18 +27,23 @@
 #define MALI_BYTES_PER_COUNTER			4
 
 struct kbase_gator_hwcnt_handles {
-	struct kbase_device  *kbdev;
-	struct kbase_context *kctx;
-	u64 hwcnt_gpu_va;
-	void *hwcnt_cpu_va;
-	struct kbase_vmap_struct hwcnt_map;
+	struct kbase_device *kbdev;
+	struct kbase_vinstr_client *vinstr_cli;
+	void *vinstr_buffer;
+	struct work_struct dump_work;
+	int dump_complete;
+	spinlock_t dump_lock;
 };
 
+static void dump_worker(struct work_struct *work);
+
 const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 {
-	uint32_t gpu_id;
 	const char * const *hardware_counters;
 	struct kbase_device *kbdev;
+	uint32_t gpu_id;
+	uint32_t product_id;
+	uint32_t count;
 
 	if (!total_counters)
 		return NULL;
@@ -50,58 +54,78 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
 		return NULL;
 
 	gpu_id = kbdev->gpu_props.props.core_props.product_id;
-
-	switch (gpu_id) {
-	/* If we are using a Mali-T60x device */
-	case GPU_ID_PI_T60X:
-		hardware_counters = hardware_counters_mali_t60x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t60x);
-		break;
-	/* If we are using a Mali-T62x device */
-	case GPU_ID_PI_T62X:
-		hardware_counters = hardware_counters_mali_t62x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t62x);
-		break;
-	/* If we are using a Mali-T72x device */
-	case GPU_ID_PI_T72X:
-		hardware_counters = hardware_counters_mali_t72x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t72x);
-		break;
-	/* If we are using a Mali-T76x device */
-	case GPU_ID_PI_T76X:
-		hardware_counters = hardware_counters_mali_t76x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t76x);
-		break;
-	/* If we are using a Mali-T82x device */
-	case GPU_ID_PI_T82X:
-		hardware_counters = hardware_counters_mali_t82x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t82x);
-		break;
-	/* If we are using a Mali-T83x device */
-	case GPU_ID_PI_T83X:
-		hardware_counters = hardware_counters_mali_t83x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t83x);
-		break;
-	/* If we are using a Mali-T86x device */
-	case GPU_ID_PI_T86X:
-		hardware_counters = hardware_counters_mali_t86x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t86x);
-		break;
-	/* If we are using a Mali-T88x device */
-	case GPU_ID_PI_TFRX:
-		hardware_counters = hardware_counters_mali_t88x;
-		*total_counters = ARRAY_SIZE(hardware_counters_mali_t88x);
-		 break;
-	default:
-		hardware_counters = NULL;
-		*total_counters = 0;
-		dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id);
-		break;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			hardware_counters = hardware_counters_mali_tMIx;
+			count = ARRAY_SIZE(hardware_counters_mali_tMIx);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
+	} else {
+		switch (gpu_id) {
+			/* If we are using a Mali-T60x device */
+		case GPU_ID_PI_T60X:
+			hardware_counters = hardware_counters_mali_t60x;
+			count = ARRAY_SIZE(hardware_counters_mali_t60x);
+			break;
+			/* If we are using a Mali-T62x device */
+		case GPU_ID_PI_T62X:
+			hardware_counters = hardware_counters_mali_t62x;
+			count = ARRAY_SIZE(hardware_counters_mali_t62x);
+			break;
+			/* If we are using a Mali-T72x device */
+		case GPU_ID_PI_T72X:
+			hardware_counters = hardware_counters_mali_t72x;
+			count = ARRAY_SIZE(hardware_counters_mali_t72x);
+			break;
+			/* If we are using a Mali-T76x device */
+		case GPU_ID_PI_T76X:
+			hardware_counters = hardware_counters_mali_t76x;
+			count = ARRAY_SIZE(hardware_counters_mali_t76x);
+			break;
+			/* If we are using a Mali-T82x device */
+		case GPU_ID_PI_T82X:
+			hardware_counters = hardware_counters_mali_t82x;
+			count = ARRAY_SIZE(hardware_counters_mali_t82x);
+			break;
+			/* If we are using a Mali-T83x device */
+		case GPU_ID_PI_T83X:
+			hardware_counters = hardware_counters_mali_t83x;
+			count = ARRAY_SIZE(hardware_counters_mali_t83x);
+			break;
+			/* If we are using a Mali-T86x device */
+		case GPU_ID_PI_T86X:
+			hardware_counters = hardware_counters_mali_t86x;
+			count = ARRAY_SIZE(hardware_counters_mali_t86x);
+			break;
+			/* If we are using a Mali-T88x device */
+		case GPU_ID_PI_TFRX:
+			hardware_counters = hardware_counters_mali_t88x;
+			count = ARRAY_SIZE(hardware_counters_mali_t88x);
+			break;
+		default:
+			hardware_counters = NULL;
+			count = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n",
+				gpu_id);
+			break;
+		}
 	}
 
 	/* Release the kbdev reference. */
 	kbase_release_device(kbdev);
 
+	*total_counters = count;
+
 	/* If we return a string array take a reference on the module (or fail). */
 	if (hardware_counters && !try_module_get(THIS_MODULE))
 		return NULL;
@@ -120,13 +144,8 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
 struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
 {
 	struct kbase_gator_hwcnt_handles *hand;
-	struct kbase_uk_hwcnt_setup setup;
-	int err;
+	struct kbase_uk_hwcnt_reader_setup setup;
 	uint32_t dump_size = 0, i = 0;
-	struct kbase_va_region *reg;
-	u64 flags;
-	u64 nr_pages;
-	u16 va_alignment = 0;
 
 	if (!in_out_info)
 		return NULL;
@@ -135,15 +154,19 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
 	if (!hand)
 		return NULL;
 
+	INIT_WORK(&hand->dump_work, dump_worker);
+	spin_lock_init(&hand->dump_lock);
+
 	/* Get the first device */
 	hand->kbdev = kbase_find_device(-1);
 	if (!hand->kbdev)
 		goto free_hand;
 
-	/* Create a kbase_context */
-	hand->kctx = kbase_create_context(hand->kbdev, true);
-	if (!hand->kctx)
+	dump_size = kbase_vinstr_dump_size(hand->kbdev);
+	hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+	if (!hand->vinstr_buffer)
 		goto release_device;
+	in_out_info->kernel_dump_buffer = hand->vinstr_buffer;
 
 	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
 	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
@@ -160,7 +183,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
 			in_out_info->nr_core_groups, GFP_KERNEL);
 
 		if (!in_out_info->hwc_layout)
-			goto destroy_context;
+			goto free_vinstr_buffer;
 
 		dump_size = in_out_info->nr_core_groups *
 			MALI_MAX_NUM_BLOCKS_PER_GROUP *
@@ -189,23 +212,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
 		}
 	/* If we are using any other device */
 	} else {
-		uint32_t nr_l2, nr_sc, j;
+		uint32_t nr_l2, nr_sc_bits, j;
 		uint64_t core_mask;
 
 		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
 
 		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
 
-		nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+		nr_sc_bits = fls64(core_mask);
 
 		/* The job manager and tiler sets of counters
 		 * are always present */
-		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL);
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
 
 		if (!in_out_info->hwc_layout)
-			goto destroy_context;
+			goto free_vinstr_buffer;
 
-		dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+		dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
 
 		in_out_info->hwc_layout[i++] = JM_BLOCK;
 		in_out_info->hwc_layout[i++] = TILER_BLOCK;
@@ -223,58 +246,32 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
 	}
 
 	in_out_info->nr_hwc_blocks = i;
-
 	in_out_info->size = dump_size;
 
-	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR;
-	nr_pages = PFN_UP(dump_size);
-	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
-			&flags, &hand->hwcnt_gpu_va, &va_alignment);
-	if (!reg)
-		goto free_layout;
-
-	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
-			dump_size, &hand->hwcnt_map);
-
-	if (!hand->hwcnt_cpu_va)
-		goto free_buffer;
-
-	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
-	memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE);
-
-	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
-	setup.dump_buffer = hand->hwcnt_gpu_va;
 	setup.jm_bm = in_out_info->bitmask[0];
 	setup.tiler_bm = in_out_info->bitmask[1];
 	setup.shader_bm = in_out_info->bitmask[2];
 	setup.mmu_l2_bm = in_out_info->bitmask[3];
-
-	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
-	if (err)
-		goto free_unmap;
-
-	kbase_instr_hwcnt_clear(hand->kctx);
+	hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx,
+			&setup, hand->vinstr_buffer);
+	if (!hand->vinstr_cli) {
+		dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core");
+		goto free_layout;
+	}
 
 	return hand;
 
-free_unmap:
-	kbase_vunmap(hand->kctx, &hand->hwcnt_map);
-
-free_buffer:
-	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);
-
 free_layout:
 	kfree(in_out_info->hwc_layout);
 
-destroy_context:
-	kbase_destroy_context(hand->kctx);
+free_vinstr_buffer:
+	kfree(hand->vinstr_buffer);
 
 release_device:
 	kbase_release_device(hand->kbdev);
 
 free_hand:
 	kfree(hand);
-
 	return NULL;
 }
 KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
@@ -285,27 +282,39 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k
 		kfree(in_out_info->hwc_layout);
 
 	if (opaque_handles) {
-		kbase_instr_hwcnt_disable(opaque_handles->kctx);
-		kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map);
-		kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va);
-		kbase_destroy_context(opaque_handles->kctx);
+		cancel_work_sync(&opaque_handles->dump_work);
+		kbase_vinstr_detach_client(opaque_handles->vinstr_cli);
+		kfree(opaque_handles->vinstr_buffer);
 		kbase_release_device(opaque_handles->kbdev);
 		kfree(opaque_handles);
 	}
 }
 KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
 
+static void dump_worker(struct work_struct *work)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+
+	hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work);
+	if (!kbase_vinstr_hwc_dump(hand->vinstr_cli,
+			BASE_HWCNT_READER_EVENT_MANUAL)) {
+		spin_lock_bh(&hand->dump_lock);
+		hand->dump_complete = 1;
+		spin_unlock_bh(&hand->dump_lock);
+	} else {
+		schedule_work(&hand->dump_work);
+	}
+}
+
 uint32_t kbase_gator_instr_hwcnt_dump_complete(
 		struct kbase_gator_hwcnt_handles *opaque_handles,
 		uint32_t * const success)
 {
-	bool ret_res, success_res;
 
 	if (opaque_handles && success) {
-		ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx,
-				&success_res);
-		*success = (uint32_t)success_res;
-		return (uint32_t)(ret_res != 0);
+		*success = opaque_handles->dump_complete;
+		opaque_handles->dump_complete = 0;
+		return *success;
 	}
 	return 0;
 }
@@ -314,9 +323,7 @@ KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
 uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
 {
 	if (opaque_handles)
-		return (kbase_instr_hwcnt_request_dump(
-				opaque_handles->kctx) == 0);
-
+		schedule_work(&opaque_handles->dump_work);
 	return 0;
 }
 KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h
index eb76f01b0fda..c247dd698e19 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -2156,4 +2156,8 @@ static const char * const hardware_counters_mali_t88x[] = {
 	"T88x_L2_REPLAY_FULL"
 };
 
+#include "mali_kbase_gator_hwcnt_names_tmix.h"
+
+
+
 #endif
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h
new file mode 100644
index 000000000000..bcceef4fc9bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gator_hwcnt_names_thex.h
@@ -0,0 +1,291 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * This header was autogenerated, it should not be edited.
+ */
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_
+
+static const char * const hardware_counters_mali_tHEx[] = {
+	/* Performance counters for the Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MESSAGES_SENT",
+	"THEx_MESSAGES_RECEIVED",
+	"THEx_GPU_ACTIVE",
+	"THEx_IRQ_ACTIVE",
+	"THEx_JS0_JOBS",
+	"THEx_JS0_TASKS",
+	"THEx_JS0_ACTIVE",
+	"",
+	"THEx_JS0_WAIT_READ",
+	"THEx_JS0_WAIT_ISSUE",
+	"THEx_JS0_WAIT_DEPEND",
+	"THEx_JS0_WAIT_FINISH",
+	"THEx_JS1_JOBS",
+	"THEx_JS1_TASKS",
+	"THEx_JS1_ACTIVE",
+	"",
+	"THEx_JS1_WAIT_READ",
+	"THEx_JS1_WAIT_ISSUE",
+	"THEx_JS1_WAIT_DEPEND",
+	"THEx_JS1_WAIT_FINISH",
+	"THEx_JS2_JOBS",
+	"THEx_JS2_TASKS",
+	"THEx_JS2_ACTIVE",
+	"",
+	"THEx_JS2_WAIT_READ",
+	"THEx_JS2_WAIT_ISSUE",
+	"THEx_JS2_WAIT_DEPEND",
+	"THEx_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Performance counters for the Tiler */
+	"",
+	"",
+	"",
+	"",
+	"THEx_TILER_ACTIVE",
+	"THEx_JOBS_PROCESSED",
+	"THEx_TRIANGLES",
+	"THEx_LINES",
+	"THEx_POINTS",
+	"THEx_FRONT_FACING",
+	"THEx_BACK_FACING",
+	"THEx_PRIM_VISIBLE",
+	"THEx_PRIM_CULLED",
+	"THEx_PRIM_CLIPPED",
+	"THEx_PRIM_SAT_CULLED",
+	"",
+	"",
+	"THEx_BUS_READ",
+	"",
+	"THEx_BUS_WRITE",
+	"THEx_LOADING_DESC",
+	"THEx_IDVS_POS_SHAD_REQ",
+	"THEx_IDVS_POS_SHAD_WAIT",
+	"THEx_IDVS_POS_SHAD_STALL",
+	"THEx_IDVS_POS_FIFO_FULL",
+	"THEx_PREFETCH_STALL",
+	"THEx_VCACHE_HIT",
+	"THEx_VCACHE_MISS",
+	"THEx_VCACHE_LINE_WAIT",
+	"THEx_VFETCH_POS_READ_WAIT",
+	"THEx_VFETCH_VERTEX_WAIT",
+	"THEx_VFETCH_STALL",
+	"THEx_PRIMASSY_STALL",
+	"THEx_BBOX_GEN_STALL",
+	"THEx_IDVS_VBU_HIT",
+	"THEx_IDVS_VBU_MISS",
+	"THEx_IDVS_VBU_LINE_DEALLOCATE",
+	"THEx_IDVS_VAR_SHAD_REQ",
+	"THEx_IDVS_VAR_SHAD_STALL",
+	"THEx_BINNER_STALL",
+	"THEx_ITER_STALL",
+	"THEx_COMPRESS_MISS",
+	"THEx_COMPRESS_STALL",
+	"THEx_PCACHE_HIT",
+	"THEx_PCACHE_MISS",
+	"THEx_PCACHE_MISS_STALL",
+	"THEx_PCACHE_EVICT_STALL",
+	"THEx_PMGR_PTR_WR_STALL",
+	"THEx_PMGR_PTR_RD_STALL",
+	"THEx_PMGR_CMD_WR_STALL",
+	"THEx_WRBUF_ACTIVE",
+	"THEx_WRBUF_HIT",
+	"THEx_WRBUF_MISS",
+	"THEx_WRBUF_NO_FREE_LINE_STALL",
+	"THEx_WRBUF_NO_AXI_ID_STALL",
+	"THEx_WRBUF_AXI_STALL",
+	"",
+	"",
+	"",
+	"THEx_UTLB_TRANS",
+	"THEx_UTLB_TRANS_HIT",
+	"THEx_UTLB_TRANS_STALL",
+	"THEx_UTLB_TRANS_MISS_DELAY",
+	"THEx_UTLB_MMU_REQ",
+
+	/* Performance counters for the Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"THEx_FRAG_ACTIVE",
+	"THEx_FRAG_PRIMITIVES",
+	"THEx_FRAG_PRIM_RAST",
+	"THEx_FRAG_FPK_ACTIVE",
+	"THEx_FRAG_STARVING",
+	"THEx_FRAG_WARPS",
+	"THEx_FRAG_PARTIAL_WARPS",
+	"THEx_FRAG_QUADS_RAST",
+	"THEx_FRAG_QUADS_EZS_TEST",
+	"THEx_FRAG_QUADS_EZS_UPDATE",
+	"THEx_FRAG_QUADS_EZS_KILL",
+	"THEx_FRAG_LZS_TEST",
+	"THEx_FRAG_LZS_KILL",
+	"",
+	"THEx_FRAG_PTILES",
+	"THEx_FRAG_TRANS_ELIM",
+	"THEx_QUAD_FPK_KILLER",
+	"",
+	"THEx_COMPUTE_ACTIVE",
+	"THEx_COMPUTE_TASKS",
+	"THEx_COMPUTE_WARPS",
+	"THEx_COMPUTE_STARVING",
+	"THEx_EXEC_CORE_ACTIVE",
+	"THEx_EXEC_ACTIVE",
+	"THEx_EXEC_INSTR_COUNT",
+	"THEx_EXEC_INSTR_DIVERGED",
+	"THEx_EXEC_INSTR_STARVING",
+	"THEx_ARITH_INSTR_SINGLE_FMA",
+	"THEx_ARITH_INSTR_DOUBLE",
+	"THEx_ARITH_INSTR_MSG",
+	"THEx_ARITH_INSTR_MSG_ONLY",
+	"THEx_TEX_INSTR",
+	"THEx_TEX_INSTR_MIPMAP",
+	"THEx_TEX_INSTR_COMPRESSED",
+	"THEx_TEX_INSTR_3D",
+	"THEx_TEX_INSTR_TRILINEAR",
+	"THEx_TEX_COORD_ISSUE",
+	"THEx_TEX_COORD_STALL",
+	"THEx_TEX_STARVE_CACHE",
+	"THEx_TEX_STARVE_FILTER",
+	"THEx_LS_MEM_READ_FULL",
+	"THEx_LS_MEM_READ_SHORT",
+	"THEx_LS_MEM_WRITE_FULL",
+	"THEx_LS_MEM_WRITE_SHORT",
+	"THEx_LS_MEM_ATOMIC",
+	"THEx_VARY_INSTR",
+	"THEx_VARY_SLOT_32",
+	"THEx_VARY_SLOT_16",
+	"THEx_ATTR_INSTR",
+	"THEx_ARITH_INSTR_FP_MUL",
+	"THEx_BEATS_RD_FTC",
+	"THEx_BEATS_RD_FTC_EXT",
+	"THEx_BEATS_RD_LSC",
+	"THEx_BEATS_RD_LSC_EXT",
+	"THEx_BEATS_RD_TEX",
+	"THEx_BEATS_RD_TEX_EXT",
+	"THEx_BEATS_RD_OTHER",
+	"THEx_BEATS_WR_LSC",
+	"THEx_BEATS_WR_TIB",
+	"",
+
+	/* Performance counters for the Memory System */
+	"",
+	"",
+	"",
+	"",
+	"THEx_MMU_REQUESTS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"THEx_L2_RD_MSG_IN",
+	"THEx_L2_RD_MSG_IN_STALL",
+	"THEx_L2_WR_MSG_IN",
+	"THEx_L2_WR_MSG_IN_STALL",
+	"THEx_L2_SNP_MSG_IN",
+	"THEx_L2_SNP_MSG_IN_STALL",
+	"THEx_L2_RD_MSG_OUT",
+	"THEx_L2_RD_MSG_OUT_STALL",
+	"THEx_L2_WR_MSG_OUT",
+	"THEx_L2_ANY_LOOKUP",
+	"THEx_L2_READ_LOOKUP",
+	"THEx_L2_WRITE_LOOKUP",
+	"THEx_L2_EXT_SNOOP_LOOKUP",
+	"THEx_L2_EXT_READ",
+	"THEx_L2_EXT_READ_NOSNP",
+	"THEx_L2_EXT_READ_UNIQUE",
+	"THEx_L2_EXT_READ_BEATS",
+	"THEx_L2_EXT_AR_STALL",
+	"THEx_L2_EXT_AR_CNT_Q1",
+	"THEx_L2_EXT_AR_CNT_Q2",
+	"THEx_L2_EXT_AR_CNT_Q3",
+	"THEx_L2_EXT_RRESP_0_127",
+	"THEx_L2_EXT_RRESP_128_191",
+	"THEx_L2_EXT_RRESP_192_255",
+	"THEx_L2_EXT_RRESP_256_319",
+	"THEx_L2_EXT_RRESP_320_383",
+	"THEx_L2_EXT_WRITE",
+	"THEx_L2_EXT_WRITE_NOSNP_FULL",
+	"THEx_L2_EXT_WRITE_NOSNP_PTL",
+	"THEx_L2_EXT_WRITE_SNP_FULL",
+	"THEx_L2_EXT_WRITE_SNP_PTL",
+	"THEx_L2_EXT_WRITE_BEATS",
+	"THEx_L2_EXT_W_STALL",
+	"THEx_L2_EXT_AW_CNT_Q1",
+	"THEx_L2_EXT_AW_CNT_Q2",
+	"THEx_L2_EXT_AW_CNT_Q3",
+	"THEx_L2_EXT_SNOOP",
+	"THEx_L2_EXT_SNOOP_STALL",
+	"THEx_L2_EXT_SNOOP_RESP_CLEAN",
+	"THEx_L2_EXT_SNOOP_RESP_DATA",
+	"THEx_L2_EXT_SNOOP_INTERNAL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+};
+
+#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h
index dc8af2d6e794..a962ecb3f9c6 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_id.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -77,8 +77,9 @@
 /* Helper macro to create a complete GPU_ID (new format) */
 #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
 	version_major, version_minor, version_status) \
-		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev) | \
-		 GPU_ID2_VERSION_MAKE(version_major, version_minor,       \
+		(GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+			product_major) | \
+		 GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
 			version_status))
 
 /* Helper macro to create a partial GPU_ID (new format) that identifies
@@ -94,6 +95,7 @@
 		(((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
 		    GPU_ID2_PRODUCT_MODEL)
 
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
 
 /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
 #define GPU_ID_S_15DEV0                   0x1
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c
index 82f4c36d509e..6df0a1cb1264 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,7 +15,7 @@
 
 
 
-#include <mali_kbase_gpu_memory_debugfs.h>
+#include <mali_kbase.h>
 
 #ifdef CONFIG_DEBUG_FS
 /** Show callback for the @c gpu_memory debugfs file.
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h
index 3cf30a4e767e..7045693eb910 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpu_memory_debugfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,10 +23,9 @@
  *
  */
 
-#ifndef _KBASE_GPU_MEMORY_H
-#define _KBASE_GPU_MEMORY_H
+#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H
+#define _KBASE_GPU_MEMORY_DEBUGFS_H
 
-#include <mali_kbase.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
@@ -35,4 +34,4 @@
  */
 void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
 
-#endif  /*_KBASE_GPU_MEMORY_H*/
+#endif  /*_KBASE_GPU_MEMORY_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h
index 781375a9a97f..f42e91b6daa1 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_gpuprops_types.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c
index f2f93de9d2e8..de2461fb8de4 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,6 +37,16 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
 	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
+	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			features = base_hw_features_tMIx;
+			break;
+		default:
+			features = base_hw_features_generic;
+			break;
+		}
+	} else {
 		switch (product_id) {
 		case GPU_ID_PI_TFRX:
 			/* FALLTHROUGH */
@@ -65,7 +75,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 			features = base_hw_features_generic;
 			break;
 		}
-
+	}
 
 	for (; *features != BASE_HW_FEATURE_END; features++)
 		set_bit(*features, &kbdev->hw_features_mask[0]);
@@ -84,6 +94,25 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
 
 	if (impl_tech != IMPLEMENTATION_MODEL) {
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id) {
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+				issues = base_hw_issues_tMIx_r0p0_05dev0;
+				break;
+			case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+				issues = base_hw_issues_tMIx_r0p0;
+				break;
+			default:
+				if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+							GPU_ID2_PRODUCT_TMIX) {
+					issues = base_hw_issues_tMIx_r0p0;
+				} else {
+					dev_err(kbdev->dev,
+						"Unknown GPU ID %x", gpu_id);
+					return -EINVAL;
+				}
+			}
+		} else {
 			switch (gpu_id) {
 			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
 				issues = base_hw_issues_t60x_r0p0_15dev0;
@@ -174,11 +203,24 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 				issues = base_hw_issues_t82x_r1p0;
 				break;
 			default:
-				dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
 				return -EINVAL;
 			}
+		}
 	} else {
 		/* Software model */
+		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+			case GPU_ID2_PRODUCT_TMIX:
+				issues = base_hw_issues_model_tMIx;
+				break;
+			default:
+				dev_err(kbdev->dev,
+					"Unknown GPU ID %x", gpu_id);
+				return -EINVAL;
+			}
+		} else {
 			switch (product_id) {
 			case GPU_ID_PI_T60X:
 				issues = base_hw_issues_model_t60x;
@@ -209,6 +251,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 					gpu_id);
 				return -EINVAL;
 			}
+		}
 	}
 
 	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h
index 2efa293088a1..abe66078029f 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_hwaccess_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,9 +71,7 @@ void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
  *
  * kbase_gpu_next_job() will pull atoms from the active context.
  *
- * Return: true if successful, false if ASID not assigned. If kctx->as_pending
- *         is true then ASID assignment will complete at some point in the
- *         future and will re-start scheduling, otherwise no ASIDs are available
+ * Return: true if successful, false if ASID not assigned.
  */
 bool kbase_backend_use_ctx(struct kbase_device *kbdev,
 				struct kbase_context *kctx,
@@ -213,6 +211,15 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
  */
 void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
 
+/**
+ * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed.
+ * @kbdev:	Device pointer
+ *
+ * Perform any required backend-specific actions (eg updating timeouts of
+ * currently running atoms).
+ */
+void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
+
 /**
  * kbase_backend_slot_free() - Return the number of jobs that can be currently
  *			       submitted to slot @js.
@@ -319,6 +326,28 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
  * signalled to know when the reset has completed.
  */
 void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs and don't emit messages into
+ * the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ */
+void kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset.
+ */
+bool kbase_reset_gpu_active(struct kbase_device *kbdev);
 #endif
 
 /**
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c
index 6ac97eb7937c..c579d0a589f7 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_ipa.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -135,7 +135,7 @@ static void init_ipa_groups(struct kbase_ipa_context *ctx)
 	memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
 }
 
-#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
 static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
 {
 	struct kbase_device *kbdev = ctx->kbdev;
@@ -145,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
 	size_t i;
 	int err;
 
-	np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups");
+	np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
 	if (!np)
 		return 0;
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c
index 1f9fbd9ee6d0..3e0a5892cc7a 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_jd.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,20 +25,15 @@
 #endif
 #include <mali_kbase.h>
 #include <mali_kbase_uku.h>
-#ifdef CONFIG_UMP
-#include <linux/ump.h>
-#endif				/* CONFIG_UMP */
 #include <linux/random.h>
 #include <linux/version.h>
 #include <linux/ratelimit.h>
-#include <linux/pagemap.h>
 
 #include <mali_kbase_jm.h>
 #include <mali_kbase_hwaccess_jm.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
+
+#include "mali_kbase_dma_fence.h"
 
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
@@ -50,7 +45,7 @@
 /* Return whether katom will run on the GPU or not. Currently only soft jobs and
  * dependency-only atoms do not run on the GPU */
 #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) ||  \
-			((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==    \
+			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
 							BASE_JD_REQ_DEP)))
 /*
  * This is the kernel side of the API. Only entry points are:
@@ -85,22 +80,23 @@ static int jd_run_atom(struct kbase_jd_atom *katom)
 
 	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
 
-	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+	if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
 		/* Dependency only atom */
 		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 		return 0;
 	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		/* Soft-job */
-		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+		if (katom->will_fail_event_code) {
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			return 0;
+		}
+		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
 						  == BASE_JD_REQ_SOFT_REPLAY) {
 			if (!kbase_replay_process(katom))
 				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 		} else if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
 			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-		} else {
-			/* The job has not completed */
-			list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
 		}
 		return 0;
 	}
@@ -110,6 +106,39 @@ static int jd_run_atom(struct kbase_jd_atom *katom)
 	return kbasep_js_add_job(kctx, katom);
 }
 
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
+{
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Check whether the atom's other dependencies were already met. If
+	 * katom is a GPU atom then the job scheduler may be able to represent
+	 * the dependencies, hence we may attempt to submit it before they are
+	 * met. Other atoms must have had both dependencies resolved.
+	 */
+	if (IS_GPU_ATOM(katom) ||
+			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+		/* katom dep complete, attempt to run it */
+		bool resched = false;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom, NULL);
+		}
+
+		if (resched)
+			kbase_js_sched_all(kbdev);
+	}
+}
+#endif
+
 #ifdef CONFIG_KDS
 
 /* Add the katom to the kds waiting list.
@@ -142,44 +171,20 @@ static void kds_dep_clear(void *callback_parameter, void *callback_extra_paramet
 {
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_context *ctx;
-	struct kbase_device *kbdev;
 
 	katom = (struct kbase_jd_atom *)callback_parameter;
 	KBASE_DEBUG_ASSERT(katom);
+
 	ctx = &katom->kctx->jctx;
-	kbdev = katom->kctx->kbdev;
-	KBASE_DEBUG_ASSERT(kbdev);
 
+	/* If KDS resource has already been satisfied (e.g. due to zapping)
+	 * do nothing.
+	 */
 	mutex_lock(&ctx->lock);
-
-	/* KDS resource has already been satisfied (e.g. due to zapping) */
-	if (katom->kds_dep_satisfied)
-		goto out;
-
-	/* This atom's KDS dependency has now been met */
-	katom->kds_dep_satisfied = true;
-
-	/* Check whether the atom's other dependencies were already met. If
-	 * katom is a GPU atom then the job scheduler may be able to represent
-	 * the dependencies, hence we may attempt to submit it before they are
-	 * met. Other atoms must have had both dependencies resolved */
-	if (IS_GPU_ATOM(katom) ||
-			(!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
-			!kbase_jd_katom_dep_atom(&katom->dep[1]))) {
-		/* katom dep complete, attempt to run it */
-		bool resched = false;
-
-		resched = jd_run_atom(katom);
-
-		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
-			/* The atom has already finished */
-			resched |= jd_done_nolock(katom, NULL);
-		}
-
-		if (resched)
-			kbase_js_sched_all(kbdev);
+	if (!katom->kds_dep_satisfied) {
+		katom->kds_dep_satisfied = true;
+		kbase_jd_dep_clear_locked(katom);
 	}
- out:
 	mutex_unlock(&ctx->lock);
 }
 
@@ -199,208 +204,6 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
 }
 #endif				/* CONFIG_KDS */
 
-static int kbase_jd_user_buf_map(struct kbase_context *kctx,
-		struct kbase_va_region *reg)
-{
-	long pinned_pages;
-	struct kbase_mem_phy_alloc *alloc;
-	struct page **pages;
-	phys_addr_t *pa;
-	long i;
-	int err = -ENOMEM;
-	unsigned long address;
-	struct task_struct *owner;
-	struct device *dev;
-	unsigned long offset;
-	unsigned long local_size;
-
-	alloc = reg->gpu_alloc;
-	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
-	owner = alloc->imported.user_buf.owner;
-
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
-
-	pages = alloc->imported.user_buf.pages;
-
-	down_read(&owner->mm->mmap_sem);
-	pinned_pages = get_user_pages(owner, owner->mm,
-			address,
-			alloc->imported.user_buf.nr_pages,
-			reg->flags & KBASE_REG_GPU_WR,
-			0, pages, NULL);
-	up_read(&owner->mm->mmap_sem);
-
-	if (pinned_pages <= 0)
-		return pinned_pages;
-
-	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
-		for (i = 0; i < pinned_pages; i++)
-			put_page(pages[i]);
-		return -ENOMEM;
-	}
-
-	dev = kctx->kbdev->dev;
-	offset = address & ~PAGE_MASK;
-	local_size = alloc->imported.user_buf.size;
-
-	for (i = 0; i < pinned_pages; i++) {
-		dma_addr_t dma_addr;
-		unsigned long min;
-
-		min = MIN(PAGE_SIZE - offset, local_size);
-		dma_addr = dma_map_page(dev, pages[i],
-				offset, min,
-				DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(dev, dma_addr))
-			goto unwind;
-
-		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
-		pa[i] = page_to_phys(pages[i]);
-
-		local_size -= min;
-		offset = 0;
-	}
-
-	alloc->nents = pinned_pages;
-
-	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
-			kbase_reg_current_backed_size(reg),
-			reg->flags);
-	if (err == 0)
-		return 0;
-
-	alloc->nents = 0;
-	/* fall down */
-unwind:
-	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				alloc->imported.user_buf.dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
-		put_page(pages[i]);
-		pages[i] = NULL;
-	}
-
-	return err;
-}
-
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
-		struct kbase_mem_phy_alloc *alloc, bool writeable)
-{
-	long i;
-	struct page **pages;
-	unsigned long size = alloc->imported.user_buf.size;
-
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
-	pages = alloc->imported.user_buf.pages;
-	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
-		unsigned long local_size;
-		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
-
-		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
-		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
-				DMA_BIDIRECTIONAL);
-		if (writeable)
-			set_page_dirty_lock(pages[i]);
-		put_page(pages[i]);
-		pages[i] = NULL;
-
-		size -= local_size;
-	}
-	alloc->nents = 0;
-}
-
-/* not to use sg_dma_len. */
-#define MALI_SG_DMA_LEN(sg)		((sg)->length)
-
-#ifdef CONFIG_DMA_SHARED_BUFFER
-static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
-{
-	struct sg_table *sgt;	/* scatterlist_table */
-	struct scatterlist *s;
-	int i;
-	phys_addr_t *pa;
-	int err;
-	size_t count = 0;
-	struct kbase_mem_phy_alloc *alloc;
-
-	alloc = reg->gpu_alloc;
-
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
-	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
-	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL);
-
-	if (IS_ERR_OR_NULL(sgt))
-		return -EINVAL;
-
-	/* save for later */
-	alloc->imported.umm.sgt = sgt;
-
-	pa = kbase_get_gpu_phy_pages(reg);
-	KBASE_DEBUG_ASSERT(pa);
-
-	for_each_sg(sgt->sgl, s, sgt->nents, i) {
-		int j;
-		/* size_t pages = PFN_UP(sg_dma_len(s)); */
-		size_t pages = PFN_UP(MALI_SG_DMA_LEN(s));
-
-		WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1),
-		"MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n",
-		MALI_SG_DMA_LEN(s));
-		/*
-		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
-		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
-		sg_dma_len(s));
-		*/
-
-		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
-		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
-		(unsigned long long) sg_dma_address(s));
-
-		for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
-			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
-
-		WARN_ONCE(j < pages,
-		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
-		alloc->imported.umm.dma_buf->size);
-	}
-
-	if (WARN_ONCE(count < reg->nr_pages,
-			"sg list from dma_buf_map_attachment < dma_buf->size=%zu, count : %zu, reg->nr_pages : %zu. \n",
-			alloc->imported.umm.dma_buf->size,
-			count,
-			reg->nr_pages)) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Update nents as we now have pages to map */
-	alloc->nents = count;
-
-	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
-
-out:
-	if (err) {
-		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-		alloc->imported.umm.sgt = NULL;
-	}
-
-	return err;
-}
-
-static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc)
-{
-	KBASE_DEBUG_ASSERT(kctx);
-	KBASE_DEBUG_ASSERT(alloc);
-	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
-	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
-	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
-	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-	alloc->imported.umm.sgt = NULL;
-	alloc->nents = 0;
-}
-#endif				/* CONFIG_DMA_SHARED_BUFFER */
-
 void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
 {
 #ifdef CONFIG_KDS
@@ -420,6 +223,16 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
 		kds_resource_set_release_sync(&katom->kds_rset);
 	}
 #endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 * Any successfully completed atom would have had all it's callbacks
+	 * completed before the atom was run, so only flush for failed atoms.
+	 */
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		flush_workqueue(katom->kctx->dma_fence.wq);
+#endif /* CONFIG_MALI_DMA_FENCE */
 }
 
 static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
@@ -433,6 +246,10 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
 		katom->kds_dep_satisfied = true;
 #endif				/* CONFIG_KDS */
 
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_signal(katom);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
 	kbase_gpu_vm_lock(katom->kctx);
 	/* only roll back if extres is non-NULL */
 	if (katom->extres) {
@@ -441,56 +258,12 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
 		res_no = katom->nr_extres;
 		while (res_no-- > 0) {
 			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+			struct kbase_va_region *reg;
 
-			switch (alloc->type) {
-#ifdef CONFIG_DMA_SHARED_BUFFER
-			case KBASE_MEM_TYPE_IMPORTED_UMM: {
-				alloc->imported.umm.current_mapping_usage_count--;
-
-				if (0 == alloc->imported.umm.current_mapping_usage_count) {
-					struct kbase_va_region *reg;
-
-					reg = kbase_region_tracker_find_region_base_address(
-							katom->kctx,
-							katom->extres[res_no].gpu_address);
-
-					if (reg && reg->gpu_alloc == alloc)
-						kbase_mmu_teardown_pages(
-								katom->kctx,
-								reg->start_pfn,
-								kbase_reg_current_backed_size(reg));
-
-					kbase_jd_umm_unmap(katom->kctx, alloc);
-				}
-			}
-			break;
-#endif /* CONFIG_DMA_SHARED_BUFFER */
-			case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
-				alloc->imported.user_buf.current_mapping_usage_count--;
-
-				if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
-					struct kbase_va_region *reg;
-
-					reg = kbase_region_tracker_find_region_base_address(
-							katom->kctx,
-							katom->extres[res_no].gpu_address);
-
-					if (reg && reg->gpu_alloc == alloc)
-						kbase_mmu_teardown_pages(
-								katom->kctx,
-								reg->start_pfn,
-								kbase_reg_current_backed_size(reg));
-
-					kbase_jd_user_buf_unmap(katom->kctx,
-							alloc,
-							reg->flags & KBASE_REG_GPU_WR);
-				}
-			}
-			break;
-			default:
-			break;
-			}
-			kbase_mem_phy_alloc_put(katom->extres[res_no].alloc);
+			reg = kbase_region_tracker_find_region_base_address(
+					katom->kctx,
+					katom->extres[res_no].gpu_address);
+			kbase_unmap_external_resource(katom->kctx, reg, alloc);
 		}
 		kfree(katom->extres);
 		katom->extres = NULL;
@@ -498,24 +271,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
 	kbase_gpu_vm_unlock(katom->kctx);
 }
 
-#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
-static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive)
-{
-	u32 i;
-
-	for (i = 0; i < *kds_res_count; i++) {
-		/* Duplicate resource, ignore */
-		if (kds_resources[i] == kds_res)
-			return;
-	}
-
-	kds_resources[*kds_res_count] = kds_res;
-	if (exclusive)
-		set_bit(*kds_res_count, kds_access_bitmap);
-	(*kds_res_count)++;
-}
-#endif
-
 /*
  * Set up external resources needed by this job.
  *
@@ -531,6 +286,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 	struct kds_resource **kds_resources = NULL;
 	unsigned long *kds_access_bitmap = NULL;
 #endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	struct kbase_dma_fence_resv_info info = {
+		.dma_fence_resv_count = 0,
+	};
+#endif
 	struct base_external_resource *input_extres;
 
 	KBASE_DEBUG_ASSERT(katom);
@@ -566,27 +326,53 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
 	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
 
-	if (NULL == kds_resources) {
+	if (!kds_resources) {
 		err_ret_val = -ENOMEM;
 		goto early_err_out;
 	}
 
 	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
-	kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL);
-
-	if (NULL == kds_access_bitmap) {
+	kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+				    sizeof(unsigned long),
+				    GFP_KERNEL);
+	if (!kds_access_bitmap) {
 		err_ret_val = -ENOMEM;
 		goto early_err_out;
 	}
 #endif				/* CONFIG_KDS */
 
+#ifdef CONFIG_MALI_DMA_FENCE
+	info.resv_objs = kmalloc_array(katom->nr_extres,
+				       sizeof(struct reservation_object *),
+				       GFP_KERNEL);
+	if (!info.resv_objs) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+
+	info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres),
+					     sizeof(unsigned long),
+					     GFP_KERNEL);
+	if (!info.dma_fence_excl_bitmap) {
+		err_ret_val = -ENOMEM;
+		goto early_err_out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	/* Take the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
+
 	/* need to keep the GPU VM locked while we set up UMM buffers */
 	kbase_gpu_vm_lock(katom->kctx);
 	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
 		struct base_external_resource *res;
 		struct kbase_va_region *reg;
+		struct kbase_mem_phy_alloc *alloc;
+		bool exclusive;
 
 		res = &input_extres[res_no];
+		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+				? true : false;
 		reg = kbase_region_tracker_find_region_enclosing_address(
 				katom->kctx,
 				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
@@ -598,80 +384,32 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 
 		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
 				(reg->flags & KBASE_REG_SECURE)) {
-			katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
-			if ((katom->core_req & BASE_JD_REQ_FS) == 0) {
-				WARN_RATELIMIT(1, "Secure non-fragment jobs not supported");
-				goto failed_loop;
-			}
+			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
 		}
 
-		/* decide what needs to happen for this resource */
-		switch (reg->gpu_alloc->type) {
-		case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
-			reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
-			if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
-					/* use a local variable to not pollute
-					 * err_ret_val with a potential success
-					 * value as some other gotos depend on
-					 * the default error code stored in
-					 * err_ret_val */
-					int tmp;
-
-					tmp = kbase_jd_user_buf_map(katom->kctx,
-							reg);
-					if (0 != tmp) {
-						/* failed to map this buffer,
-						 * roll back */
-						err_ret_val = tmp;
-						reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
-						goto failed_loop;
-					}
-			}
-		}
-		break;
-		case BASE_MEM_IMPORT_TYPE_UMP: {
-#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
-				struct kds_resource *kds_res;
-
-				kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
-				if (kds_res)
-					add_kds_resource(kds_res, kds_resources, &kds_res_count,
-							kds_access_bitmap,
-							res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
-#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
-				break;
-		}
-#ifdef CONFIG_DMA_SHARED_BUFFER
-		case BASE_MEM_IMPORT_TYPE_UMM: {
-#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
-				struct kds_resource *kds_res;
-
-				kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf);
-				if (kds_res)
-					add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
-#endif
-				reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
-				if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
-					/* use a local variable to not pollute err_ret_val
-					 * with a potential success value as some other gotos depend
-					 * on the default error code stored in err_ret_val */
-					int tmp;
-
-					tmp = kbase_jd_umm_map(katom->kctx, reg);
-					if (tmp) {
-						/* failed to map this buffer, roll back */
-						err_ret_val = tmp;
-						reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
-						goto failed_loop;
-					}
-				}
-				break;
-		}
+		alloc = kbase_map_external_resource(katom->kctx, reg,
+				current->mm
+#ifdef CONFIG_KDS
+				, &kds_res_count, kds_resources,
+				kds_access_bitmap, exclusive
 #endif
-		default:
+				);
+		if (!alloc) {
+			err_ret_val = -EINVAL;
 			goto failed_loop;
 		}
 
+#ifdef CONFIG_MALI_DMA_FENCE
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			struct reservation_object *resv;
+
+			resv = reg->gpu_alloc->imported.umm.dma_buf->resv;
+			if (resv)
+				kbase_dma_fence_add_reservation(resv, &info,
+								exclusive);
+		}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
 		/* finish with updating out array with the data we found */
 		/* NOTE: It is important that this is the last thing we do (or
 		 * at least not before the first write) as we overwrite elements
@@ -679,12 +417,15 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 		 * until the last read for an element.
 		 * */
 		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
-		katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+		katom->extres[res_no].alloc = alloc;
 	}
 	/* successfully parsed the extres array */
 	/* drop the vm lock before we call into kds */
 	kbase_gpu_vm_unlock(katom->kctx);
 
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
 #ifdef CONFIG_KDS
 	if (kds_res_count) {
 		int wait_failed;
@@ -709,46 +450,63 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 	kfree(kds_access_bitmap);
 #endif				/* CONFIG_KDS */
 
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (info.dma_fence_resv_count) {
+		int ret;
+
+		ret = kbase_dma_fence_wait(katom, &info);
+		if (ret < 0)
+			goto failed_dma_fence_setup;
+	}
+
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif /* CONFIG_MALI_DMA_FENCE */
+
 	/* all done OK */
 	return 0;
 
 /* error handling section */
 
+#ifdef CONFIG_MALI_DMA_FENCE
+failed_dma_fence_setup:
 #ifdef CONFIG_KDS
- failed_kds_setup:
+	/* If we are here, dma_fence setup failed but KDS didn't.
+	 * Revert KDS setup if any.
+	 */
+	if (kds_res_count) {
+		mutex_unlock(&katom->kctx->jctx.lock);
+		kds_resource_set_release_sync(&katom->kds_rset);
+		mutex_lock(&katom->kctx->jctx.lock);
+
+		kbase_jd_kds_waiters_remove(katom);
+		katom->kds_dep_satisfied = true;
+	}
+#endif /* CONFIG_KDS */
+#endif /* CONFIG_MALI_DMA_FENCE */
+#ifdef CONFIG_KDS
+failed_kds_setup:
+#endif
+#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE)
+	/* Lock the processes mmap lock */
+	down_read(&current->mm->mmap_sem);
 
 	/* lock before we unmap */
 	kbase_gpu_vm_lock(katom->kctx);
-#endif				/* CONFIG_KDS */
+#endif
 
  failed_loop:
 	/* undo the loop work */
 	while (res_no-- > 0) {
 		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
-#ifdef CONFIG_DMA_SHARED_BUFFER
-		if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-			alloc->imported.umm.current_mapping_usage_count--;
-
-			if (0 == alloc->imported.umm.current_mapping_usage_count) {
-				struct kbase_va_region *reg;
-
-				reg = kbase_region_tracker_find_region_base_address(
-						katom->kctx,
-						katom->extres[res_no].gpu_address);
 
-				if (reg && reg->gpu_alloc == alloc)
-					kbase_mmu_teardown_pages(katom->kctx,
-							reg->start_pfn,
-							kbase_reg_current_backed_size(reg));
-
-				kbase_jd_umm_unmap(katom->kctx, alloc);
-			}
-		}
-#endif				/* CONFIG_DMA_SHARED_BUFFER */
-		kbase_mem_phy_alloc_put(alloc);
+		kbase_unmap_external_resource(katom->kctx, NULL, alloc);
 	}
 	kbase_gpu_vm_unlock(katom->kctx);
 
+	/* Release the processes mmap lock */
+	up_read(&current->mm->mmap_sem);
+
  early_err_out:
 	kfree(katom->extres);
 	katom->extres = NULL;
@@ -756,35 +514,33 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 	kfree(kds_resources);
 	kfree(kds_access_bitmap);
 #endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	kfree(info.resv_objs);
+	kfree(info.dma_fence_excl_bitmap);
+#endif
 	return err_ret_val;
 }
 
 static inline void jd_resolve_dep(struct list_head *out_list,
 					struct kbase_jd_atom *katom,
-					u8 d,
-					bool ctx_is_dying)
+					u8 d, bool ctx_is_dying)
 {
 	u8 other_d = !d;
 
 	while (!list_empty(&katom->dep_head[d])) {
 		struct kbase_jd_atom *dep_atom;
+		struct kbase_jd_atom *other_dep_atom;
 		u8 dep_type;
 
 		dep_atom = list_entry(katom->dep_head[d].next,
 				struct kbase_jd_atom, dep_item[d]);
-
 		list_del(katom->dep_head[d].next);
 
 		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
 		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
 
 		if (katom->event_code != BASE_JD_EVENT_DONE &&
-			(dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) {
-			/* Atom failed, so remove the other dependencies and immediately fail the atom */
-			if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
-				list_del(&dep_atom->dep_item[other_d]);
-				kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]);
-			}
+			(dep_type != BASE_JD_DEP_TYPE_ORDER)) {
 #ifdef CONFIG_KDS
 			if (!dep_atom->kds_dep_satisfied) {
 				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
@@ -794,17 +550,67 @@ static inline void jd_resolve_dep(struct list_head *out_list,
 			}
 #endif
 
+#ifdef CONFIG_MALI_DMA_FENCE
+			kbase_dma_fence_cancel_callbacks(dep_atom);
+#endif
+
 			dep_atom->event_code = katom->event_code;
 			KBASE_DEBUG_ASSERT(dep_atom->status !=
 						KBASE_JD_ATOM_STATE_UNUSED);
-			dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 
-			list_add_tail(&dep_atom->dep_item[0], out_list);
-		} else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+					!= BASE_JD_REQ_SOFT_REPLAY) {
+				dep_atom->will_fail_event_code =
+					dep_atom->event_code;
+			} else {
+				dep_atom->status =
+					KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+		}
+		other_dep_atom = (struct kbase_jd_atom *)
+			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
+
+		if (!dep_atom->in_jd_list && (!other_dep_atom ||
+				(IS_GPU_ATOM(dep_atom) && !ctx_is_dying &&
+				!dep_atom->will_fail_event_code &&
+				!other_dep_atom->will_fail_event_code))) {
+			bool dep_satisfied = true;
+#ifdef CONFIG_MALI_DMA_FENCE
+			int dep_count;
+
+			dep_count = atomic_read(&dep_atom->dma_fence.dep_count);
+			if (likely(dep_count == -1)) {
+				dep_satisfied = true;
+			} else if (dep_count == 0) {
+				/*
+				 * All fences for this atom has signaled, but
+				 * the worker that will queue the atom has not
+				 * yet run.
+				 *
+				 * Mark the atom as handled by setting
+				 * dep_count to -1 so that the worker doesn't
+				 * queue the atom again.
+				 */
+				atomic_set(&dep_atom->dma_fence.dep_count, -1);
+				/*
+				 * Remove the atom from the list of dma-fence
+				 * waiting atoms.
+				 */
+				kbase_dma_fence_waiters_remove(dep_atom);
+				dep_satisfied = true;
+			} else {
+				dep_satisfied = false;
+			}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
 #ifdef CONFIG_KDS
-			if (dep_atom->kds_dep_satisfied)
+			dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied;
 #endif
-				list_add_tail(&dep_atom->dep_item[0], out_list);
+
+			if (dep_satisfied) {
+				dep_atom->in_jd_list = true;
+				list_add_tail(&dep_atom->jd_item, out_list);
+			}
 		}
 	}
 }
@@ -847,7 +653,7 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom)
 		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
 			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
 
-			if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
 						     BASE_JD_REQ_SOFT_REPLAY &&
 			    (dep_atom->core_req & kbdev->force_replay_core_req)
 					     == kbdev->force_replay_core_req) {
@@ -859,6 +665,36 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom)
 }
 #endif
 
+static void jd_try_submitting_deps(struct list_head *out_list,
+		struct kbase_jd_atom *node)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *pos;
+
+		list_for_each(pos, &node->dep_head[i]) {
+			struct kbase_jd_atom *dep_atom = list_entry(pos,
+					struct kbase_jd_atom, dep_item[i]);
+
+			if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) {
+				/*Check if atom deps look sane*/
+				bool dep0_valid = !dep_atom->dep[0].atom ||
+						(dep_atom->dep[0].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+				bool dep1_valid = !dep_atom->dep[1].atom ||
+						(dep_atom->dep[1].atom->status
+						>= KBASE_JD_ATOM_STATE_IN_JS);
+
+				if (dep0_valid && dep1_valid) {
+					dep_atom->in_jd_list = true;
+					list_add(&dep_atom->jd_item, out_list);
+				}
+			}
+		}
+	}
+}
+
 /*
  * Perform the necessary handling of an atom that has finished running
  * on the GPU.
@@ -873,7 +709,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 {
 	struct kbase_context *kctx = katom->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
 	struct list_head completed_jobs;
 	struct list_head runnable_jobs;
 	bool need_to_try_schedule_context = false;
@@ -888,7 +723,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 	jd_check_force_failure(katom);
 #endif
 
-
 	/* This is needed in case an atom is failed due to being invalid, this
 	 * can happen *before* the jobs that the atom depends on have completed */
 	for (i = 0; i < 2; i++) {
@@ -915,17 +749,16 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 	}
 
 	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-	list_add_tail(&katom->dep_item[0], &completed_jobs);
+	list_add_tail(&katom->jd_item, &completed_jobs);
 
 	while (!list_empty(&completed_jobs)) {
-		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item);
 		list_del(completed_jobs.prev);
-
 		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
 
 		for (i = 0; i < 2; i++)
 			jd_resolve_dep(&runnable_jobs, katom, i,
-						js_kctx_info->ctx.is_dying);
+					kctx->jctx.sched_info.ctx.is_dying);
 
 		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
 			kbase_jd_post_external_resources(katom);
@@ -934,35 +767,47 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 			struct kbase_jd_atom *node;
 
 			node = list_entry(runnable_jobs.next,
-					struct kbase_jd_atom, dep_item[0]);
-
+					struct kbase_jd_atom, jd_item);
 			list_del(runnable_jobs.next);
+			node->in_jd_list = false;
 
 			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
 
-			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) {
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+					!kctx->jctx.sched_info.ctx.is_dying) {
 				need_to_try_schedule_context |= jd_run_atom(node);
 			} else {
 				node->event_code = katom->event_code;
 
-				if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
-						  == BASE_JD_REQ_SOFT_REPLAY) {
+				if ((node->core_req &
+					BASE_JD_REQ_SOFT_JOB_TYPE) ==
+					BASE_JD_REQ_SOFT_REPLAY) {
 					if (kbase_replay_process(node))
 						/* Don't complete this atom */
 						continue;
 				} else if (node->core_req &
 							BASE_JD_REQ_SOFT_JOB) {
-					/* If this is a fence wait then remove it from the list of sync waiters. */
+					/* If this is a fence wait soft job
+					 * then remove it from the list of sync
+					 * waiters.
+					 */
 					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
-						list_del(&node->dep_item[0]);
+						kbasep_remove_waiting_soft_job(node);
 
 					kbase_finish_soft_job(node);
 				}
 				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
 			}
 
-			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
-				list_add_tail(&node->dep_item[0], &completed_jobs);
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				list_add_tail(&node->jd_item, &completed_jobs);
+			} else if (node->status == KBASE_JD_ATOM_STATE_IN_JS &&
+					!node->will_fail_event_code) {
+				/* Node successfully submitted, try submitting
+				 * dependencies as they may now be representable
+				 * in JS */
+				jd_try_submitting_deps(&runnable_jobs, node);
+			}
 		}
 
 		/* Register a completed job as a disjoint event when the GPU
@@ -970,7 +815,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 		 */
 		kbase_disjoint_event_potential(kctx->kbdev);
 		if (completed_jobs_ctx)
-			list_add_tail(&katom->dep_item[0], completed_jobs_ctx);
+			list_add_tail(&katom->jd_item, completed_jobs_ctx);
 		else
 			kbase_event_post(kctx, katom);
 
@@ -1042,23 +887,19 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
 }
 #endif
 
-bool jd_submit_atom(struct kbase_context *kctx,
-			 const struct base_jd_atom_v2 *user_atom,
-			 struct kbase_jd_atom *katom)
+bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
 {
 	struct kbase_jd_context *jctx = &kctx->jctx;
-	base_jd_core_req core_req;
 	int queued = 0;
 	int i;
 	int sched_prio;
 	bool ret;
+	bool will_fail = false;
 
 	/* Update the TOTAL number of jobs. This includes those not tracked by
 	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
 	jctx->job_nr++;
 
-	core_req = user_atom->core_req;
-
 	katom->start_timestamp.tv64 = 0;
 	katom->time_spent_us = 0;
 	katom->udata = user_atom->udata;
@@ -1069,18 +910,28 @@ bool jd_submit_atom(struct kbase_context *kctx,
 	katom->affinity = 0;
 	katom->jc = user_atom->jc;
 	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-	katom->core_req = core_req;
+	katom->core_req = user_atom->core_req;
 	katom->atom_flags = 0;
 	katom->retry_count = 0;
 	katom->need_cache_flush_cores_retained = 0;
+	katom->pre_dep = NULL;
+	katom->post_dep = NULL;
 	katom->x_pre_dep = NULL;
 	katom->x_post_dep = NULL;
+	katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED;
+	katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK;
+	katom->age = kctx->age_count++;
+
+	INIT_LIST_HEAD(&katom->jd_item);
 #ifdef CONFIG_KDS
 	/* Start by assuming that the KDS dependencies are satisfied,
 	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
 	katom->kds_dep_satisfied = true;
 	katom->kds_rset = NULL;
 #endif				/* CONFIG_KDS */
+#ifdef CONFIG_MALI_DMA_FENCE
+	atomic_set(&katom->dma_fence.dep_count, -1);
+#endif
 
 	/* Don't do anything if there is a mess up with dependencies.
 	   This is done in a separate cycle to check both the dependencies at ones, otherwise
@@ -1096,7 +947,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
 					dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
 				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
 				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 				/* Wrong dependency setup. Atom will be sent
 				 * back to user space. Do not record any
 				 * dependencies. */
@@ -1105,7 +956,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
 						kbase_jd_atom_id(kctx, katom));
 				kbase_tlstream_tl_ret_atom_ctx(
 						katom, kctx);
-#endif
+
 				ret = jd_done_nolock(katom, NULL);
 				goto out;
 			}
@@ -1138,16 +989,10 @@ bool jd_submit_atom(struct kbase_context *kctx,
 				continue;
 			}
 
-			if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) {
-				/* Remove the previous dependency */
-				list_del(&katom->dep_item[0]);
-				kbase_jd_katom_dep_clear(&katom->dep[0]);
-			}
-
 			/* Atom has completed, propagate the error code if any */
 			katom->event_code = dep_atom->event_code;
 			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
 			/* This atom is going through soft replay or
 			 * will be sent back to user space. Do not record any
 			 * dependencies. */
@@ -1155,17 +1000,16 @@ bool jd_submit_atom(struct kbase_context *kctx,
 					katom,
 					kbase_jd_atom_id(kctx, katom));
 			kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
-#endif
-			if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+
+			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
 					 == BASE_JD_REQ_SOFT_REPLAY) {
 				if (kbase_replay_process(katom)) {
 					ret = false;
 					goto out;
 				}
 			}
-			ret = jd_done_nolock(katom, NULL);
+			will_fail = true;
 
-			goto out;
 		} else {
 			/* Atom is in progress, add this atom to the list */
 			list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
@@ -1174,12 +1018,25 @@ bool jd_submit_atom(struct kbase_context *kctx,
 		}
 	}
 
-	/* These must occur after the above loop to ensure that an atom that
-	 * depends on a previous atom with the same number behaves as expected */
-	katom->event_code = BASE_JD_EVENT_DONE;
-	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	if (will_fail) {
+		if (!queued) {
+			ret = jd_done_nolock(katom, NULL);
+
+			goto out;
+		} else {
+			katom->will_fail_event_code = katom->event_code;
+			ret = false;
+
+			goto out;
+		}
+	} else {
+		/* These must occur after the above loop to ensure that an atom
+		 * that depends on a previous atom with the same number behaves
+		 * as expected */
+		katom->event_code = BASE_JD_EVENT_DONE;
+		katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	}
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	/* Create a new atom recording all dependencies it was set up with. */
 	kbase_tlstream_tl_new_atom(
 			katom,
@@ -1187,15 +1044,26 @@ bool jd_submit_atom(struct kbase_context *kctx,
 	kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
 	for (i = 0; i < 2; i++)
 		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
-					&katom->dep[i]))
+					&katom->dep[i])) {
 			kbase_tlstream_tl_dep_atom_atom(
 					(void *)kbase_jd_katom_dep_atom(
 						&katom->dep[i]),
 					(void *)katom);
-#endif
+		} else if (BASE_JD_DEP_TYPE_INVALID !=
+				user_atom->pre_dep[i].dependency_type) {
+			/* Resolved dependency. */
+			int dep_atom_number =
+				user_atom->pre_dep[i].atom_id;
+			struct kbase_jd_atom *dep_atom =
+				&jctx->atoms[dep_atom_number];
+
+			kbase_tlstream_tl_rdep_atom_atom(
+					(void *)dep_atom,
+					(void *)katom);
+		}
 
 	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
-	if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
 		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		ret = jd_done_nolock(katom, NULL);
@@ -1213,6 +1081,17 @@ bool jd_submit_atom(struct kbase_context *kctx,
 		goto out;
 	}
 
+	/* Reject atoms with invalid core requirements */
+	if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
+			(katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) {
+		dev_warn(kctx->kbdev->dev,
+				"Rejecting atom with invalid core requirements");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
+		ret = jd_done_nolock(katom, NULL);
+		goto out;
+	}
+
 	/* For invalid priority, be most lenient and choose the default */
 	sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
 	if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
@@ -1269,7 +1148,15 @@ bool jd_submit_atom(struct kbase_context *kctx,
 	}
 #endif				/* CONFIG_KDS */
 
-	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+
+#ifdef CONFIG_MALI_DMA_FENCE
+	if (atomic_read(&katom->dma_fence.dep_count) != -1) {
+		ret = false;
+		goto out;
+	}
+#endif /* CONFIG_MALI_DMA_FENCE */
+
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
 						  == BASE_JD_REQ_SOFT_REPLAY) {
 		if (kbase_replay_process(katom))
 			ret = false;
@@ -1283,10 +1170,9 @@ bool jd_submit_atom(struct kbase_context *kctx,
 			ret = jd_done_nolock(katom, NULL);
 			goto out;
 		}
-		/* The job has not yet completed */
-		list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+
 		ret = false;
-	} else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+	} else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
 		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
 		ret = kbasep_js_add_job(kctx, katom);
 		/* If job was cancelled then resolve immediately */
@@ -1372,7 +1258,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 			user_atom.udata = user_atom_v6.udata;
 			user_atom.extres_list = user_atom_v6.extres_list;
 			user_atom.nr_extres = user_atom_v6.nr_extres;
-			user_atom.core_req = user_atom_v6.core_req;
+			user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff);
 
 			/* atom number 0 is used for no dependency atoms */
 			if (!user_atom_v6.pre_dep[0])
@@ -1404,6 +1290,12 @@ int kbase_jd_submit(struct kbase_context *kctx,
 		}
 #endif /* BASE_LEGACY_UK6_SUPPORT */
 
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+		if (KBASE_API_VERSION(10, 3) > kctx->api_version)
+			user_atom.core_req = (u32)(user_atom.compat_core_req
+					      & 0x7fff);
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
 		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
 
 		mutex_lock(&jctx->lock);
@@ -1478,7 +1370,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 	struct kbase_jd_context *jctx;
 	struct kbase_context *kctx;
 	struct kbasep_js_kctx_info *js_kctx_info;
-	union kbasep_js_policy *js_policy;
 	struct kbase_device *kbdev;
 	struct kbasep_js_device_data *js_devdata;
 	u64 cache_jc = katom->jc;
@@ -1497,7 +1388,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 	kbdev = kctx->kbdev;
 	js_kctx_info = &kctx->jctx.sched_info;
 	js_devdata = &kbdev->js_data;
-	js_policy = &kbdev->js_data.policy;
 
 	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
 
@@ -1522,7 +1412,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
-		mutex_unlock(&jctx->lock);
 
 		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
@@ -1530,6 +1419,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 		kbase_js_unpull(kctx, katom);
 
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&jctx->lock);
 
 		return;
 	}
@@ -1639,7 +1529,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 		while (!list_empty(&kctx->completed_jobs)) {
 			struct kbase_jd_atom *atom = list_entry(
 					kctx->completed_jobs.next,
-					struct kbase_jd_atom, dep_item[0]);
+					struct kbase_jd_atom, jd_item);
 			list_del(kctx->completed_jobs.next);
 
 			kbase_event_post(kctx, atom);
@@ -1714,51 +1604,6 @@ static void jd_cancel_worker(struct work_struct *data)
 		kbase_js_sched_all(kbdev);
 }
 
-/**
- * jd_evict_worker - Work queue job evict function
- * @data: a &struct work_struct
- *
- * Only called as part of evicting failed jobs. This is only called on jobs that
- * were never submitted to HW Access. Jobs that were submitted are handled
- * through kbase_jd_done_worker().
- * Operates serially with the kbase_jd_done_worker() on the work queue.
- *
- * We don't need to release most of the resources that would occur on
- * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
- * running (by virtue of having not been submitted to HW Access).
- */
-static void jd_evict_worker(struct work_struct *data)
-{
-	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
-									work);
-	struct kbase_jd_context *jctx;
-	struct kbase_context *kctx;
-	struct kbasep_js_kctx_info *js_kctx_info;
-	struct kbase_device *kbdev;
-
-	/* Soft jobs should never reach this function */
-	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
-
-	kctx = katom->kctx;
-	kbdev = kctx->kbdev;
-	jctx = &kctx->jctx;
-	js_kctx_info = &kctx->jctx.sched_info;
-
-	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
-
-	/* Scheduler: Remove the job from the system */
-	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-	kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
-	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
-
-	mutex_lock(&jctx->lock);
-	jd_done_nolock(katom, NULL);
-	/* katom may have been freed now, do not use! */
-	mutex_unlock(&jctx->lock);
-
-	kbase_js_sched_all(kbdev);
-}
-
 /**
  * kbase_jd_done - Complete a job that has been removed from the Hardware
  * @katom: atom which has been completed
@@ -1802,7 +1647,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
 
 #ifdef CONFIG_DEBUG_FS
 	/* a failed job happened and is waiting for dumping*/
-	if (kbase_debug_job_fault_process(katom, katom->event_code))
+	if (!katom->will_fail_event_code &&
+			kbase_debug_job_fault_process(katom, katom->event_code))
 		return;
 #endif
 
@@ -1840,30 +1686,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
 	queue_work(kctx->jctx.job_done_wq, &katom->work);
 }
 
-void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
-{
-	struct kbase_context *kctx;
-	struct kbasep_js_kctx_info *js_kctx_info;
-
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
-	KBASE_DEBUG_ASSERT(NULL != katom);
-	kctx = katom->kctx;
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-
-	js_kctx_info = &kctx->jctx.sched_info;
-
-	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
-
-	/* This should only be done from a context that is currently scheduled
-	 */
-	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
-
-	WARN_ON(work_pending(&katom->work));
-
-	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
-	INIT_WORK(&katom->work, jd_evict_worker);
-	queue_work(kctx->jctx.job_done_wq, &katom->work);
-}
 
 void kbase_jd_zap_context(struct kbase_context *kctx)
 {
@@ -1886,8 +1708,9 @@ void kbase_jd_zap_context(struct kbase_context *kctx)
 	 * queued outside the job scheduler.
 	 */
 
+	del_timer_sync(&kctx->soft_job_timeout);
 	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
-		katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+		katom = list_entry(entry, struct kbase_jd_atom, queue);
 		kbase_cancel_soft_job(katom);
 	}
 
@@ -1910,8 +1733,19 @@ void kbase_jd_zap_context(struct kbase_context *kctx)
 	}
 #endif
 
+#ifdef CONFIG_MALI_DMA_FENCE
+	kbase_dma_fence_cancel_all_atoms(kctx);
+#endif
+
 	mutex_unlock(&kctx->jctx.lock);
 
+#ifdef CONFIG_MALI_DMA_FENCE
+	/* Flush dma-fence workqueue to ensure that any callbacks that may have
+	 * been queued are done before continuing.
+	 */
+	flush_workqueue(kctx->dma_fence.wq);
+#endif
+
 	kbase_jm_wait_for_zero_jobs(kctx);
 }
 
@@ -1927,7 +1761,8 @@ int kbase_jd_init(struct kbase_context *kctx)
 
 	KBASE_DEBUG_ASSERT(kctx);
 
-	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1);
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd",
+			WQ_HIGHPRI | WQ_UNBOUND, 1);
 	if (NULL == kctx->jctx.job_done_wq) {
 		mali_err = -ENOMEM;
 		goto out1;
@@ -1942,6 +1777,12 @@ int kbase_jd_init(struct kbase_context *kctx)
 		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
 		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
 		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+
+#ifdef CONFIG_MALI_DMA_FENCE
+		kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1);
+		atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks);
+#endif
 	}
 
 	mutex_init(&kctx->jctx.lock);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c
index 78761e69d04d..ac6c3ce333ed 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,7 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_hw.h>
 
 #include <mali_kbase_defs.h>
@@ -79,13 +77,6 @@ static int kbase_js_get_slot(struct kbase_device *kbdev,
 static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 		kbasep_js_policy_ctx_job_cb callback);
 
-static bool kbase_js_evict_atom(struct kbase_context *kctx,
-				struct kbase_jd_atom *katom_evict,
-				struct kbase_jd_atom *start_katom,
-				struct kbase_jd_atom *head_katom,
-				struct list_head *evict_list,
-				struct jsctx_rb *rb, int idx);
-
 /* Helper for trace subcodes */
 #if KBASE_TRACE_ENABLE
 static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
@@ -239,26 +230,6 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
 	return result;
 }
 
-/**
- * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to check.
- * @prio: Priority to check.
- *
- * Caller must hold runpool_irq.lock
- *
- * Return: true if the ring buffer is empty, false otherwise.
- */
-static inline bool
-jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
-{
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-
-	return rb->running_idx == rb->write_idx;
-}
-
 /**
  * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
  * @kctx: Pointer to kbase context with ring buffer.
@@ -270,18 +241,16 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
  * ring buffer to be full (with running atoms) when this functions returns
  * true.
  *
- * Caller must hold runpool_irq.lock
- *
  * Return: true if there are no atoms to pull, false otherwise.
  */
 static inline bool
 jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 {
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-	return rb->read_idx == rb->write_idx;
+	return RB_EMPTY_ROOT(&rb->runnable_tree);
 }
 
 /**
@@ -311,115 +280,68 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
 }
 
 /**
- * jsctx_rb_compact_prio(): - Compact a ring buffer
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to compact.
- * @prio: Priority id to compact.
- */
-static inline void
-jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio)
-{
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-	u16 compact_idx = rb->write_idx - 1;
-	u16 end_idx = rb->running_idx - 1;
-	u16 i;
-
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	for (i = compact_idx; i != end_idx; i--) {
-		if (rb->entries[i & JSCTX_RB_MASK].atom_id !=
-				KBASEP_ATOM_ID_INVALID) {
-			WARN_ON(compact_idx < rb->running_idx);
-			rb->entries[compact_idx & JSCTX_RB_MASK].atom_id =
-					rb->entries[i & JSCTX_RB_MASK].atom_id;
-
-			compact_idx--;
-		}
-		if (rb->read_idx == i)
-			rb->read_idx = compact_idx + 1;
-	}
-
-	rb->running_idx = compact_idx + 1;
-}
-
-/**
- * jsctx_rb_compact(): - Compact all priority ring buffers
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to compact.
- */
-static inline void
-jsctx_rb_compact(struct kbase_context *kctx, int js)
-{
-	int prio;
-
-	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
-		jsctx_rb_compact_prio(kctx, js, prio);
-}
-
-/**
- * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer
- * @kctx:     Pointer to kbase context with ring buffer.
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
  * @js:       Job slot id to iterate.
  * @prio:     Priority id to iterate.
  * @callback: Function pointer to callback.
  *
- * Iterate over a ring buffer and invoke @callback for each entry in buffer, and
- * remove the entry from the buffer.
+ * Iterate over a queue and invoke @callback for each entry in the queue, and
+ * remove the entry from the queue.
  *
- * If entries are added to the ring buffer while this is running those entries
- * may, or may not be covered. To ensure that all entries in the buffer have
- * been enumerated when this function returns jsctx->lock must be held when
- * calling this function.
+ * If entries are added to the queue while this is running those entries may, or
+ * may not be covered. To ensure that all entries in the buffer have been
+ * enumerated when this function returns jsctx->lock must be held when calling
+ * this function.
  *
  * The HW access lock, js_data.runpool_irq.lock, must always be held when
  * calling this function.
  */
 static void
-jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio,
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
 		kbasep_js_policy_ctx_job_cb callback)
 {
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-	struct kbase_jd_atom *katom;
-	u16 write_idx = ACCESS_ONCE(rb->write_idx);
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-	/* There must be no jobs currently in HW access */
-	WARN_ON(rb->read_idx != rb->running_idx);
+	while (!RB_EMPTY_ROOT(&queue->runnable_tree)) {
+		struct rb_node *node = rb_first(&queue->runnable_tree);
+		struct kbase_jd_atom *entry = rb_entry(node,
+				struct kbase_jd_atom, runnable_tree_node);
 
-	/* Invoke callback on all kbase_jd_atoms in the ring buffer, and
-	 * removes them from the buffer */
-	while (rb->read_idx != write_idx) {
-		int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+		rb_erase(node, &queue->runnable_tree);
+		callback(kctx->kbdev, entry);
+	}
 
-		katom = kbase_jd_atom_from_id(kctx, id);
+	while (!list_empty(&queue->x_dep_head)) {
+		struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next,
+				struct kbase_jd_atom, queue);
 
-		rb->read_idx++;
-		rb->running_idx++;
+		list_del(queue->x_dep_head.next);
 
-		callback(kctx->kbdev, katom);
+		callback(kctx->kbdev, entry);
 	}
 }
 
 /**
- * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb
- * @kctx:     Pointer to kbase context with ring buffer.
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
  * @js:       Job slot id to iterate.
  * @callback: Function pointer to callback.
  *
  * Iterate over all the different priorities, and for each call
- * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback
- * for each entry in buffer, and remove the entry from the buffer.
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
  */
 static inline void
-jsctx_rb_foreach(struct kbase_context *kctx, int js,
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
 		kbasep_js_policy_ctx_job_cb callback)
 {
 	int prio;
 
 	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
-		jsctx_rb_foreach_prio(kctx, js, prio, callback);
+		jsctx_queue_foreach_prio(kctx, js, prio, callback);
 }
 
 /**
@@ -436,16 +358,16 @@ jsctx_rb_foreach(struct kbase_context *kctx, int js,
 static inline struct kbase_jd_atom *
 jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
 {
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-	int id;
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+	struct rb_node *node;
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-	if (jsctx_rb_none_to_pull_prio(kctx, js, prio))
+	node = rb_first(&rb->runnable_tree);
+	if (!node)
 		return NULL;
 
-	id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
-	return kbase_jd_atom_from_id(kctx, id);
+	return rb_entry(node, struct kbase_jd_atom, runnable_tree_node);
 }
 
 /**
@@ -457,6 +379,8 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
  * pointer to the next atom, unless all the priority's ring buffers are empty.
  *
+ * Caller must hold the runpool_irq.lock.
+ *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
 static inline struct kbase_jd_atom *
@@ -464,6 +388,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 {
 	int prio;
 
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
 	for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
 		struct kbase_jd_atom *katom;
 
@@ -475,35 +401,6 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 	return NULL;
 }
 
-/**
- * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to check.
- * @prio: Priority id to check.
- *
- * Check the ring buffer for the specified @js and @prio and return a
- * pointer to the last atom, unless all the priority's ring buffers are empty.
- *
- * The last atom is the atom that was added using jsctx_rb_add() most recently.
- *
- * Return: Pointer to last atom in buffer, or NULL if there is no atom.
- */
-static inline struct kbase_jd_atom *
-jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
-{
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-	int id;
-
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	if (jsctx_rb_is_empty_prio(kctx, js, prio))
-		return NULL;
-
-	id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id;
-	return kbase_jd_atom_from_id(kctx, id);
-}
-
 /**
  * jsctx_rb_pull(): - Mark atom in list as running
  * @kctx:  Pointer to kbase context with ring buffer.
@@ -518,158 +415,71 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	int prio = katom->sched_priority;
 	int js = katom->slot_nr;
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
 	/* Atoms must be pulled in the correct order. */
 	WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
 
-	rb->read_idx++;
+	rb_erase(&katom->runnable_tree_node, &rb->runnable_tree);
 }
 
-/**
- * jsctx_rb_unpull(): - Undo marking of atom in list as running
- * @kctx:  Pointer to kbase context with ring buffer.
- * @katom: Pointer to katom to unpull.
- *
- * Undo jsctx_rb_pull() and put @katom back in the queue.
- *
- * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
- * pulled.
- */
-static inline void
-jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
-{
-	int prio = katom->sched_priority;
-	int js = katom->slot_nr;
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0)
 
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-
-	/* Atoms must be unpulled in correct order. */
-	WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id !=
-			kbase_jd_atom_id(kctx, katom));
-
-	rb->read_idx--;
-}
-
-/**
- * jsctx_rb_add(): - Add atom to ring buffer
- * @kctx:  Pointer to kbase context with ring buffer.
- * @katom: Pointer to katom to add.
- *
- * Add @katom to the ring buffer determined by the atom's priority and job slot
- * number.
- *
- * If the ring buffer is full -EBUSY will be returned.
- *
- * Return: On success 0 is returned, on failure a negative error code.
- */
-static int
-jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+static void
+jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	int prio = katom->sched_priority;
 	int js = katom->slot_nr;
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
 
-	lockdep_assert_held(&kctx->jctx.lock);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-	/* Check if the ring buffer is full */
-	if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE)
-		return -EBUSY;
+	while (*new) {
+		struct kbase_jd_atom *entry = container_of(*new,
+				struct kbase_jd_atom, runnable_tree_node);
 
-	rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id =
-			kbase_jd_atom_id(kctx, katom);
-	rb->write_idx++;
+		parent = *new;
+		if (LESS_THAN_WRAP(katom->age, entry->age))
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
 
-	return 0;
+	/* Add new node and rebalance tree. */
+	rb_link_node(&katom->runnable_tree_node, parent, new);
+	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
 }
 
 /**
- * jsctx_rb_remove(): - Remove atom from ring buffer
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
  * @kctx:  Pointer to kbase context with ring buffer.
- * @katom: Pointer to katom to remove.
+ * @katom: Pointer to katom to unpull.
  *
- * Remove @katom from the ring buffer.
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
  *
- * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and
- * atoms must be removed in the same order they were pulled from the ring
- * buffer.
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
  */
 static inline void
-jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-	int prio = katom->sched_priority;
-	int js = katom->slot_nr;
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-	/* Atoms must be completed in order. */
-	WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id !=
-			kbase_jd_atom_id(kctx, katom));
-
-	rb->running_idx++;
+	jsctx_tree_add(kctx, katom);
 }
 
-/**
- * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer
- * @kctx:        Pointer to kbase context with ring buffer.
- * @start_katom: Pointer to the first katom to evict.
- * @head_katom:  Pointer to head katom.
- * @evict_list:  Pointer to head of list where evicted atoms are added.
- *
- * Iterate over the ring buffer starting at @start_katom and evict @start_atom
- * and dependent atoms in ring buffer.
- *
- * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will
- * examine the atom dependencies.
- *
- * jsctx_rb_evict() is only called by kbase_js_evict_deps().
- */
-static void
-jsctx_rb_evict(struct kbase_context *kctx,
-		struct kbase_jd_atom *start_katom,
-		struct kbase_jd_atom *head_katom,
-		struct list_head *evict_list)
-{
-	int prio = start_katom->sched_priority;
-	int js = start_katom->slot_nr;
-	struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-	bool atom_in_rb = false;
-	u16 i, start_idx;
-
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-	lockdep_assert_held(&kctx->jctx.lock);
-
-	for (i = rb->running_idx; i != rb->write_idx; i++) {
-		if (rb->entries[i & JSCTX_RB_MASK].atom_id ==
-				kbase_jd_atom_id(kctx, start_katom)) {
-			start_idx = i;
-			atom_in_rb = true;
-			break;
-		}
-	}
-
-	/* start_katom must still be in ring buffer. */
-	if (i == rb->write_idx || !atom_in_rb)
-		return;
-
-	/* Evict all dependencies on same slot. */
-	for (i = start_idx; i != rb->write_idx; i++) {
-		u8 katom_evict;
-
-		katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id;
-		if (katom_evict != KBASEP_ATOM_ID_INVALID) {
-			if (!kbase_js_evict_atom(kctx,
-						&kctx->jctx.atoms[katom_evict],
-						start_katom, head_katom,
-						evict_list, rb, i))
-				break;
-		}
-	}
-}
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+					int js,
+					bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js);
 
 /*
  * Functions private to KBase ('Protected' functions)
@@ -742,6 +552,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 		DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
 	jsdd->cfs_ctx_runtime_min_slices =
 		DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
 
 	dev_dbg(kbdev->dev, "JS Config Attribs: ");
 	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -768,6 +579,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 			jsdd->cfs_ctx_runtime_init_slices);
 	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
 			jsdd->cfs_ctx_runtime_min_slices);
+	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
+		atomic_read(&jsdd->soft_job_timeout_ms));
 
 	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
 			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
@@ -872,7 +685,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
 	struct kbase_device *kbdev;
 	struct kbasep_js_kctx_info *js_kctx_info;
 	int err;
-	int i;
+	int i, j;
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
@@ -912,6 +725,13 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
 	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
 		return -EINVAL;
 
+	for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+		for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+			INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head);
+			kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT;
+		}
+	}
+
 	return 0;
 }
 
@@ -966,30 +786,25 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 }
 
 /**
- * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot
- *                                  pullable context queue
+ * kbase_js_ctx_list_add_pullable_nolock - Variant of
+ *                                         kbase_jd_ctx_list_add_pullable()
+ *                                         where the caller must hold
+ *                                         runpool_irq.lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * If the context is on either the pullable or unpullable queues, then it is
- * removed before being added to the tail.
- *
- * This function should be used when queueing a context for the first time, or
- * re-queueing a context that has been pulled from.
- *
- * Caller must hold kbasep_jd_device_data.queue_mutex
+ * Caller must hold runpool_irq.lock
  *
  * Return: true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
 						struct kbase_context *kctx,
 						int js)
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->js_data.queue_mutex);
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -1012,30 +827,24 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
 }
 
 /**
- * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
- *                                       per-slot pullable context queue
+ * kbase_js_ctx_list_add_pullable_head_nolock - Variant of
+ *                                              kbase_js_ctx_list_add_pullable_head()
+ *                                              where the caller must hold
+ *                                              runpool_irq.lock
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
  *
- * If the context is on either the pullable or unpullable queues, then it is
- * removed before being added to the head.
- *
- * This function should be used when a context has been scheduled, but no jobs
- * can currently be pulled from it.
- *
- * Caller must hold kbasep_jd_device_data.queue_mutex
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js)
+static bool kbase_js_ctx_list_add_pullable_head_nolock(
+		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->js_data.queue_mutex);
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -1058,8 +867,37 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
 }
 
 /**
- * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot
- *                                    unpullable context queue
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ *                                       per-slot pullable context queue
+ * @kbdev:  Device pointer
+ * @kctx:   Context to add to queue
+ * @js:     Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Return:  true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+						struct kbase_context *kctx,
+						int js)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the
+ *                                           per-slot unpullable context queue
  * @kbdev:  Device pointer
  * @kctx:   Context to add to queue
  * @js:     Job slot to use
@@ -1070,18 +908,17 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
  * This function should be used when a context has been pulled from, and there
  * are no jobs remaining on the specified slot.
  *
- * Caller must hold kbasep_jd_device_data.queue_mutex
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
 						struct kbase_context *kctx,
 						int js)
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->js_data.queue_mutex);
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
 				&kbdev->js_data.ctx_list_unpullable[js]);
@@ -1101,8 +938,8 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
 }
 
 /**
- * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or
- *                            unpullable context queues
+ * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable
+ *                                   or unpullable context queues
  * @kbdev:  Device pointer
  * @kctx:   Context to remove from queue
  * @js:     Job slot to use
@@ -1112,18 +949,17 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
  * This function should be used when a context has no jobs on the GPU, and no
  * jobs remaining for the specified slot.
  *
- * Caller must hold kbasep_jd_device_data.queue_mutex
+ * Caller must hold runpool_irq.lock
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
 					struct kbase_context *kctx,
 					int js)
 {
 	bool ret = false;
 
-	lockdep_assert_held(&kbdev->js_data.queue_mutex);
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
 
@@ -1144,23 +980,24 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
 }
 
 /**
- * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
- *                              queue.
+ * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head()
+ *                                     where the caller must hold
+ *                                     runpool_irq.lock
  * @kbdev:  Device pointer
  * @js:     Job slot to use
  *
- * Caller must hold kbasep_jd_device_data::queue_mutex
+ * Caller must hold runpool_irq.lock
  *
  * Return:  Context to use for specified slot.
  *          NULL if no contexts present for specified slot
  */
-static struct kbase_context *kbase_js_ctx_list_pop_head(
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 						struct kbase_device *kbdev,
 						int js)
 {
 	struct kbase_context *kctx;
 
-	lockdep_assert_held(&kbdev->js_data.queue_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
 	if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
 		return NULL;
@@ -1174,6 +1011,28 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
 	return kctx;
 }
 
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ *                              queue.
+ * @kbdev:  Device pointer
+ * @js:     Job slot to use
+ *
+ * Return:  Context to use for specified slot.
+ *          NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+		struct kbase_device *kbdev, int js)
+{
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js);
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	return kctx;
+}
+
 /**
  * kbase_js_ctx_pullable - Return if a context can be pulled from on the
  *                         specified slot
@@ -1207,7 +1066,8 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 		return false; /* next atom blocked */
 	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
 		if (katom->x_pre_dep->gpu_rb_state ==
-					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
 			return false;
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
@@ -1236,7 +1096,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 
 			/* Dependent atom must already have been submitted */
 			if (!(dep_atom->atom_flags &
-					KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) {
+					KBASE_KATOM_FLAG_JSCTX_IN_TREE)) {
 				ret = false;
 				break;
 			}
@@ -1255,6 +1115,12 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 					ret = false;
 					break;
 				}
+				/* Each dependee atom can only have one
+				 * same-slot dependency */
+				if (dep_atom->post_dep) {
+					ret = false;
+					break;
+				}
 				has_dep = true;
 			} else {
 				/* Only one cross-slot dependency can be
@@ -1294,21 +1160,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 				has_x_dep = true;
 			}
 
-			if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
-						BASE_JD_DEP_TYPE_DATA &&
-					js == dep_js) {
-				struct kbase_jd_atom *last_atom =
-						jsctx_rb_peek_last(kctx, js,
-								prio);
-
-				/* Last atom on slot must be pre-dep for this
-				 * atom */
-				if (last_atom != dep_atom) {
-					ret = false;
-					break;
-				}
-			}
-
 			/* Dependency can be represented in ringbuffers */
 		}
 	}
@@ -1342,9 +1193,10 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 				}
 				if ((kbase_jd_katom_dep_type(&katom->dep[i])
 						== BASE_JD_DEP_TYPE_DATA) &&
-								(js == dep_js))
-					katom->atom_flags |=
-						KBASE_KATOM_FLAG_FAIL_PREV;
+						(js == dep_js)) {
+					katom->pre_dep = dep_atom;
+					dep_atom->post_dep = katom;
+				}
 
 				list_del(&katom->dep_item[i]);
 				kbase_jd_katom_dep_clear(&katom->dep[i]);
@@ -1410,17 +1262,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 
 	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
 
-	if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) {
-		/* Ringbuffer was full (should be impossible) - fail the job */
-		--(js_kctx_info->ctx.nr_jobs);
-
-		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-		mutex_unlock(&js_devdata->runpool_mutex);
-
-		atom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-
-		goto out_unlock;
-	}
+	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
 
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
 				kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
@@ -1430,11 +1272,11 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 
 	if (enqueue_required) {
 		if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false))
-			timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx,
-								atom->slot_nr);
+			timer_sync = kbase_js_ctx_list_add_pullable_nolock(
+					kbdev, kctx, atom->slot_nr);
 		else
-			timer_sync = kbase_js_ctx_list_add_unpullable(kbdev,
-					kctx, atom->slot_nr);
+			timer_sync = kbase_js_ctx_list_add_unpullable_nolock(
+					kbdev, kctx, atom->slot_nr);
 	}
 	/* If this context is active and the atom is the first on its slot,
 	 * kick the job manager to attempt to fast-start the atom */
@@ -1762,9 +1604,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 		kbase_trace_mali_mmu_as_released(kctx->as_nr);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 		kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
-#endif
 
 		kbase_backend_release_ctx_irq(kbdev, kctx);
 
@@ -1817,7 +1657,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
 
 		/* Queue an action to occur after we've dropped the lock */
-		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED |
+			KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
 	} else {
 		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
 				katom_retained_state, runpool_ctx_attr_change);
@@ -1875,14 +1716,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbasep_js_kctx_info *js_kctx_info;
-	base_jd_event_code event_code;
 	kbasep_js_release_result release_result;
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	js_kctx_info = &kctx->jctx.sched_info;
 	js_devdata = &kbdev->js_data;
-	event_code = katom_retained_state->event_code;
 
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
@@ -1954,77 +1793,11 @@ static void kbasep_js_runpool_release_ctx_no_schedule(
 	 */
 }
 
-/**
- * kbase_js_set_timeouts - update all JS timeouts with user specified data
- * @kbdev: Device pointer
- *
- * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
- * set to a positive number then that becomes the new value used, if a timeout
- * is negative then the default is set.
- */
-static void kbase_js_set_timeouts(struct kbase_device *kbdev)
+void kbase_js_set_timeouts(struct kbase_device *kbdev)
 {
-	struct kbasep_js_device_data *js_data = &kbdev->js_data;
-
-	if (kbdev->js_scheduling_period_ns < 0)
-		js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
-	else if (kbdev->js_scheduling_period_ns > 0)
-		js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns;
-
-	if (kbdev->js_soft_stop_ticks < 0)
-		js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
-	else if (kbdev->js_soft_stop_ticks > 0)
-		js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks;
-
-	if (kbdev->js_soft_stop_ticks_cl < 0)
-		js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
-	else if (kbdev->js_soft_stop_ticks_cl > 0)
-		js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
-
-	if (kbdev->js_hard_stop_ticks_ss < 0) {
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
-			js_data->hard_stop_ticks_ss =
-					DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
-		else
-			js_data->hard_stop_ticks_ss =
-					DEFAULT_JS_HARD_STOP_TICKS_SS;
-	} else if (kbdev->js_hard_stop_ticks_ss > 0) {
-		js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
-	}
-
-	if (kbdev->js_hard_stop_ticks_cl < 0)
-		js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
-	else if (kbdev->js_hard_stop_ticks_cl > 0)
-		js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
-
-	if (kbdev->js_hard_stop_ticks_dumping < 0)
-		js_data->hard_stop_ticks_dumping =
-				DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
-	else if (kbdev->js_hard_stop_ticks_dumping > 0)
-		js_data->hard_stop_ticks_dumping =
-				kbdev->js_hard_stop_ticks_dumping;
-
-	if (kbdev->js_reset_ticks_ss < 0) {
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
-			js_data->gpu_reset_ticks_ss =
-					DEFAULT_JS_RESET_TICKS_SS_8408;
-		else
-			js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
-	} else if (kbdev->js_reset_ticks_ss > 0) {
-		js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
-	}
-
-	if (kbdev->js_reset_ticks_cl < 0)
-		js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
-	else if (kbdev->js_reset_ticks_cl > 0)
-		js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
 
-	if (kbdev->js_reset_ticks_dumping < 0)
-		js_data->gpu_reset_ticks_dumping =
-				DEFAULT_JS_RESET_TICKS_DUMPING;
-	else if (kbdev->js_reset_ticks_dumping > 0)
-		js_data->gpu_reset_ticks_dumping =
-				kbdev->js_reset_ticks_dumping;
+	kbase_backend_timeouts_changed(kbdev);
 }
 
 static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
@@ -2071,16 +1844,6 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 				0u,
 				kbasep_js_trace_get_refcnt(kbdev, kctx));
 
-	if (js_devdata->nr_user_contexts_running == 0 &&
-			kbdev->js_timeouts_updated) {
-		/* Only when there are no other contexts submitting jobs:
-		 * Latch in run-time job scheduler timeouts that were set
-		 * through js_timeouts sysfs file */
-		kbase_js_set_timeouts(kbdev);
-
-		kbdev->js_timeouts_updated = false;
-	}
-
 	js_kctx_info->ctx.is_scheduled = true;
 
 	mutex_lock(&new_address_space->transaction_mutex);
@@ -2090,13 +1853,10 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&new_address_space->transaction_mutex);
-		/* If address space is not pending, then kbase_backend_use_ctx()
-		 * failed. Roll back the transaction so far and return */
-		if (!kctx->as_pending) {
-			js_kctx_info->ctx.is_scheduled = false;
+		/* Roll back the transaction so far and return */
+		js_kctx_info->ctx.is_scheduled = false;
 
-			kbase_backend_release_free_address_space(kbdev, as_nr);
-		}
+		kbase_backend_release_free_address_space(kbdev, as_nr);
 
 		mutex_unlock(&js_devdata->runpool_mutex);
 
@@ -2109,9 +1869,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
-#endif
 
 	/* Cause any future waiter-on-termination to wait until the context is
 	 * descheduled */
@@ -2169,11 +1927,6 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
 	unsigned long flags;
 
 	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
-	if (kctx->as_pending) {
-		/* Context waiting for AS to be assigned */
-		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-		return false;
-	}
 	if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
 		/* Context already has ASID - mark as active */
 		kbdev->hwaccess.active_kctx = kctx;
@@ -2212,7 +1965,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
 	is_scheduled = js_kctx_info->ctx.is_scheduled;
 	if (!is_scheduled) {
 		/* Add the context to the pullable list */
-		if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0))
+		if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0))
 			kbase_js_sync_timers(kbdev);
 
 		/* Fast-starting requires the jsctx_mutex to be dropped,
@@ -2240,7 +1993,6 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
 		struct kbase_context *kctx)
 {
 	struct kbasep_js_kctx_info *js_kctx_info;
-	bool pending;
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 	js_kctx_info = &kctx->jctx.sched_info;
@@ -2248,13 +2000,10 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
 	/* We don't need to use the address space anymore */
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
-	pending = kctx->as_pending;
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 
-	/* Release the context - it will be scheduled out if there is no
-	 * pending job */
-	if (!pending)
-		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	/* Release the context - it will be scheduled out */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
 
 	kbase_js_sched_all(kbdev);
 }
@@ -2293,8 +2042,10 @@ void kbasep_js_suspend(struct kbase_device *kbdev)
 			 * the instrumented context. It'll be suspended by
 			 * disabling instrumentation */
 			if (kctx->jctx.sched_info.ctx.flags &
-					KBASE_CTX_FLAG_PRIVILEGED)
-				KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+					KBASE_CTX_FLAG_PRIVILEGED) {
+				++nr_privileged_ctx;
+				WARN_ON(nr_privileged_ctx != 1);
+			}
 		}
 	}
 	CSTD_UNUSED(nr_privileged_ctx);
@@ -2345,7 +2096,8 @@ void kbasep_js_resume(struct kbase_device *kbdev)
 
 			if (!js_kctx_info->ctx.is_scheduled &&
 				kbase_js_ctx_pullable(kctx, js, false))
-				timer_sync = kbase_js_ctx_list_add_pullable(
+				timer_sync =
+					kbase_js_ctx_list_add_pullable_nolock(
 							kbdev, kctx, js);
 
 			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
@@ -2397,37 +2149,118 @@ static int kbase_js_get_slot(struct kbase_device *kbdev,
 	return 1;
 }
 
-int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
-					struct kbase_jd_atom *katom,
-					bool *enqueue_required)
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom)
 {
+	bool enqueue_required;
+
 	katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	lockdep_assert_held(&kctx->jctx.lock);
 
 	/* If slot will transition from unpullable to pullable then add to
 	 * pullable list */
 	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
-		*enqueue_required = true;
+		enqueue_required = true;
 	} else {
-		*enqueue_required = false;
+		enqueue_required = false;
 	}
 	/* Check if there are lower priority jobs to soft stop */
 	kbase_job_slot_ctx_priority_check_locked(kctx, katom);
 
-	/* Add atom to ring buffer. */
-	if (unlikely(jsctx_rb_add_atom(kctx, katom))) {
-		/* The ring buffer is full. This should be impossible as the
-		 * job dispatcher can not submit enough atoms to exceed the
-		 * ring buffer size. Fail the job.
-		 */
-		WARN(1, "Job submit while JSCTX ringbuffer already full\n");
-		return -EINVAL;
+	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
+			(katom->pre_dep && (katom->pre_dep->atom_flags &
+			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		int prio = katom->sched_priority;
+		int js = katom->slot_nr;
+		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+
+		list_add_tail(&katom->queue, &queue->x_dep_head);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+		enqueue_required = false;
+	} else {
+		/* Add atom to ring buffer. */
+		jsctx_tree_add(kctx, katom);
+		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
 	}
 
-	katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+	return enqueue_required;
+}
 
-	return 0;
+/**
+ * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the
+ *                         runnable_tree, ready for execution
+ * @katom: Atom to submit
+ *
+ * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set,
+ * but is still present in the x_dep list. If @katom has a same-slot dependent
+ * atom then that atom (and any dependents) will also be moved.
+ */
+static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
+{
+	lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock);
+
+	while (katom) {
+		WARN_ON(!(katom->atom_flags &
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST));
+
+		if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+			list_del(&katom->queue);
+			katom->atom_flags &=
+					~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
+			jsctx_tree_add(katom->kctx, katom);
+			katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE;
+		} else {
+			break;
+		}
+
+		katom = katom->post_dep;
+	}
+}
+
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+				struct kbase_jd_atom *katom, int js, int prio)
+{
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
+	struct kbase_jd_atom *next_katom = katom->post_dep;
+
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	if (next_katom) {
+		KBASE_DEBUG_ASSERT(next_katom->status !=
+				KBASE_JD_ATOM_STATE_HW_COMPLETED);
+		next_katom->will_fail_event_code = katom->event_code;
+
+	}
+
+	/* Has cross slot depenency. */
+	if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE |
+				KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
+		/* Remove dependency.*/
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+		/* Fail if it had a data dependency. */
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+			x_dep->will_fail_event_code = katom->event_code;
+		}
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
+			kbase_js_move_to_tree(x_dep);
+	}
 }
 
 struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
@@ -2456,8 +2289,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 	/* Due to ordering restrictions when unpulling atoms on failure, we do
 	 * not allow multiple runs of fail-dep atoms from the same context to be
 	 * present on the same slot */
-	if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
-				atomic_read(&kctx->atoms_pulled_slot[js])) {
+	if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) {
 		struct kbase_jd_atom *prev_atom =
 				kbase_backend_inspect_tail(kctx->kbdev, js);
 
@@ -2467,7 +2299,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
 	if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
 		if (katom->x_pre_dep->gpu_rb_state ==
-					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+					katom->x_pre_dep->will_fail_event_code)
 			return NULL;
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 				kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
@@ -2510,6 +2343,8 @@ static void js_return_worker(struct work_struct *data)
 	u64 affinity = katom->affinity;
 	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
+	kbase_tlstream_aux_job_softstop_ex(katom);
+
 	kbase_backend_complete_wq(kbdev, katom);
 
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
@@ -2529,13 +2364,14 @@ static void js_return_worker(struct work_struct *data)
 
 	if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
 			jsctx_rb_none_to_pull(kctx, js))
-		timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
+		timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js);
 
 	if (!atomic_read(&kctx->atoms_pulled)) {
 		if (!kctx->slots_pullable) {
 			WARN_ON(!kctx->ctx_runnable_ref);
 			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
 		}
 
 		if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
@@ -2549,7 +2385,7 @@ static void js_return_worker(struct work_struct *data)
 			for (slot = 0; slot < num_slots; slot++) {
 				if (kbase_js_ctx_pullable(kctx, slot, true))
 					timer_sync |=
-						kbase_js_ctx_list_add_pullable(
+					kbase_js_ctx_list_add_pullable_nolock(
 							kbdev, kctx, slot);
 			}
 		}
@@ -2601,112 +2437,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 	queue_work(kctx->jctx.job_done_wq, &katom->work);
 }
 
-static bool kbase_js_evict_atom(struct kbase_context *kctx,
-				struct kbase_jd_atom *katom_evict,
-				struct kbase_jd_atom *start_katom,
-				struct kbase_jd_atom *head_katom,
-				struct list_head *evict_list,
-				struct jsctx_rb *rb, int idx)
-{
-	struct kbase_jd_atom *x_dep = katom_evict->x_post_dep;
-
-	if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
-						katom_evict != start_katom)
-		return false;
-
-	if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
-		WARN_ON(katom_evict->event_code != head_katom->event_code);
-
-		return false;
-	}
-
-	if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED &&
-						katom_evict != head_katom)
-		return false;
-
-	/* Evict cross dependency if present */
-	if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
-			&& (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
-		list_add_tail(&x_dep->dep_item[0], evict_list);
-
-	/* If cross dependency is present and does not have a data dependency
-	 * then unblock */
-	if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
-			&& !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
-		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
-
-	if (katom_evict != head_katom) {
-		rb->entries[idx & JSCTX_RB_MASK].atom_id =
-				KBASEP_ATOM_ID_INVALID;
-
-		katom_evict->event_code = head_katom->event_code;
-		katom_evict->atom_flags &=
-					~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
-
-		if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF)
-			kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL,
-									0);
-		else
-			kbase_jd_evict(kctx->kbdev, katom_evict);
-	}
-
-	return true;
-}
-
-/**
- * kbase_js_evict_deps - Evict dependencies
- * @kctx:       Context pointer
- * @head_katom: Pointer to the atom to evict
- *
- * Remove all post dependencies of an atom from the context ringbuffers.
- *
- * The original atom's event_code will be propogated to all dependent atoms.
- *
- * Context: Caller must hold both jctx and HW access locks
- */
-static void kbase_js_evict_deps(struct kbase_context *kctx,
-				struct kbase_jd_atom *head_katom)
-{
-	struct list_head evict_list;
-
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-
-	INIT_LIST_HEAD(&evict_list);
-
-	list_add_tail(&head_katom->dep_item[0], &evict_list);
-
-	while (!list_empty(&evict_list)) {
-		struct kbase_jd_atom *start_katom;
-
-		start_katom = list_entry(evict_list.prev, struct kbase_jd_atom,
-								dep_item[0]);
-		list_del(evict_list.prev);
-
-		jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list);
-	}
-}
-
-/**
- * kbase_js_compact - Compact JSCTX ringbuffers
- * @kctx:  Context pointer
- *
- * Compact the JSCTX ringbuffers, removing any NULL entries
- *
- * Context: Caller must hold both jctx and HW access locks
- */
-static void kbase_js_compact(struct kbase_context *kctx)
-{
-	struct kbase_device *kbdev = kctx->kbdev;
-	int js;
-
-	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
-	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
-
-	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
-		jsctx_rb_compact(kctx, js);
-}
-
 bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 						struct kbase_jd_atom *katom)
 {
@@ -2729,12 +2459,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	mutex_lock(&js_devdata->runpool_mutex);
 	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
-	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
-		if (katom->event_code != BASE_JD_EVENT_DONE)
-			kbase_js_evict_deps(kctx, katom);
-
-		jsctx_rb_remove(kctx, katom);
-
+	if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) {
 		context_idle = !atomic_dec_return(&kctx->atoms_pulled);
 		atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
 
@@ -2743,16 +2468,18 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 			WARN_ON(!kctx->ctx_runnable_ref);
 			kctx->ctx_runnable_ref = false;
 			atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+			timer_sync = true;
 		}
-
-		if (katom->event_code != BASE_JD_EVENT_DONE)
-			kbase_js_compact(kctx);
 	}
+	WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE));
 
 	if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
-			jsctx_rb_none_to_pull(kctx, atom_slot))
-		timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx,
-				atom_slot);
+			jsctx_rb_none_to_pull(kctx, atom_slot)) {
+		if (!list_empty(
+			&kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot]))
+			timer_sync |= kbase_js_ctx_list_remove_nolock(
+					kctx->kbdev, kctx, atom_slot);
+	}
 
 	/*
 	 * If submission is disabled on this context (most likely due to an
@@ -2768,7 +2495,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 			if (kbase_js_ctx_pullable(kctx, js, true))
-				timer_sync |= kbase_js_ctx_list_add_pullable(
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
 							kbdev, kctx, js);
 		}
 	} else if (katom->x_post_dep &&
@@ -2777,7 +2505,8 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 			if (kbase_js_ctx_pullable(kctx, js, true))
-				timer_sync |= kbase_js_ctx_list_add_pullable(
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
 							kbdev, kctx, js);
 		}
 	}
@@ -2802,32 +2531,29 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 	struct kbase_device *kbdev;
 	struct kbase_context *kctx = katom->kctx;
 	union kbasep_js_policy *js_policy;
-	struct kbasep_js_device_data *js_devdata;
+	struct kbase_jd_atom *x_dep = katom->x_post_dep;
 
 	kbdev = kctx->kbdev;
 
 	js_policy = &kbdev->js_data.policy;
-	js_devdata = &kbdev->js_data;
 
 	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
+	if (katom->will_fail_event_code)
+		katom->event_code = katom->will_fail_event_code;
+
 	katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
 
+	if (katom->event_code != BASE_JD_EVENT_DONE) {
+		kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+				katom->sched_priority);
+	}
+
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
 				katom->slot_nr), NULL, 0);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-	kbase_tlstream_tl_nret_atom_lpu(
-			katom,
-			&kbdev->gpu_props.props.raw_props.js_features[
-				katom->slot_nr]);
-	kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
-	kbase_tlstream_tl_nret_ctx_lpu(
-			kctx,
-			&kbdev->gpu_props.props.raw_props.js_features[
-				katom->slot_nr]);
-#endif
+
 	/* Calculate the job's time used */
 	if (end_timestamp != NULL) {
 		/* Only calculating it for jobs that really run on the HW (e.g.
@@ -2851,21 +2577,26 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
 
 	/* Unblock cross dependency if present */
-	if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
-			!(katom->x_post_dep->atom_flags &
-						KBASE_KATOM_FLAG_FAIL_BLOCKER)))
-		katom->x_post_dep->atom_flags &=
-					~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+	if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+			!(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) &&
+			(x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) {
+		bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false);
+		x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+		kbase_js_move_to_tree(x_dep);
+		if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr,
+				false))
+			kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx,
+					x_dep->slot_nr);
+	}
 }
 
 void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 {
 	struct kbasep_js_device_data *js_devdata;
-	union kbasep_js_policy *js_policy;
 	bool timer_sync = false;
 
 	js_devdata = &kbdev->js_data;
-	js_policy = &js_devdata->policy;
 
 	down(&js_devdata->schedule_sem);
 	mutex_lock(&js_devdata->queue_mutex);
@@ -2915,16 +2646,15 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				/* Context can not be used at this time */
 				spin_lock_irqsave(&js_devdata->runpool_irq.lock,
 									flags);
-				if (kctx->as_pending ||
-					kbase_js_ctx_pullable(kctx, js, false)
+				if (kbase_js_ctx_pullable(kctx, js, false)
 					|| (kctx->jctx.sched_info.ctx.flags &
 						KBASE_CTX_FLAG_PRIVILEGED))
 					timer_sync |=
-					kbase_js_ctx_list_add_pullable_head(
+					kbase_js_ctx_list_add_pullable_head_nolock(
 							kctx->kbdev, kctx, js);
 				else
 					timer_sync |=
-					kbase_js_ctx_list_add_unpullable(
+					kbase_js_ctx_list_add_unpullable_nolock(
 							kctx->kbdev, kctx, js);
 				spin_unlock_irqrestore(
 					&js_devdata->runpool_irq.lock, flags);
@@ -2953,12 +2683,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 				/* Failed to pull jobs - push to head of list */
 				if (kbase_js_ctx_pullable(kctx, js, true))
 					timer_sync |=
-					kbase_js_ctx_list_add_pullable_head(
+					kbase_js_ctx_list_add_pullable_head_nolock(
 								kctx->kbdev,
 								kctx, js);
 				else
 					timer_sync |=
-					kbase_js_ctx_list_add_unpullable(
+					kbase_js_ctx_list_add_unpullable_nolock(
 								kctx->kbdev,
 								kctx, js);
 
@@ -2984,10 +2714,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 
 			/* Push to back of list */
 			if (kbase_js_ctx_pullable(kctx, js, true))
-				timer_sync |= kbase_js_ctx_list_add_pullable(
+				timer_sync |=
+					kbase_js_ctx_list_add_pullable_nolock(
 							kctx->kbdev, kctx, js);
 			else
-				timer_sync |= kbase_js_ctx_list_add_unpullable(
+				timer_sync |=
+					kbase_js_ctx_list_add_unpullable_nolock(
 							kctx->kbdev, kctx, js);
 			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
 									flags);
@@ -3018,6 +2750,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 	/* First, atomically do the following:
 	 * - mark the context as dying
 	 * - try to evict it from the policy queue */
+	mutex_lock(&kctx->jctx.lock);
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 	js_kctx_info->ctx.is_dying = true;
@@ -3093,6 +2826,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
 	} else {
 		unsigned long flags;
 		bool was_retained;
@@ -3128,6 +2862,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
+		mutex_unlock(&kctx->jctx.lock);
 
 		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
 									kctx);
@@ -3194,7 +2929,6 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 	u32 js;
 
 	kbdev = kctx->kbdev;
-
 	js_devdata = &kbdev->js_data;
 
 	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
@@ -3204,7 +2938,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 
 	/* Invoke callback on jobs on each slot in turn */
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
-		jsctx_rb_foreach(kctx, js, callback);
+		jsctx_queue_foreach(kctx, js, callback);
 
 	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h
index 868c6808d628..66b213293016 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -491,15 +491,22 @@ void kbasep_js_resume(struct kbase_device *kbdev);
  * @param[in] kctx  Context pointer
  * @param[in] atom  Pointer to the atom to submit
  *
- * @return 0 if submit succeeded
- *         error code if the atom can not be submitted at this
- *         time, due to insufficient space in the ringbuffer, or dependencies
- *         that can not be represented.
- */
-int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
-					struct kbase_jd_atom *katom,
-					bool *enqueue_required);
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+					struct kbase_jd_atom *katom);
 
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
 /**
  * @brief Pull an atom from a context in the job scheduler for execution.
  *
@@ -601,6 +608,16 @@ void kbase_js_zap_context(struct kbase_context *kctx);
 bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom);
 
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+void kbase_js_set_timeouts(struct kbase_device *kbdev);
+
 /*
  * Helpers follow
  */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h
index 04f7809f79d3..e1342045b394 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -348,6 +348,9 @@ struct kbasep_js_device_data {
 	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
 	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
 
+	/**< Value for JS_SOFT_JOB_TIMEOUT */
+	atomic_t soft_job_timeout_ms;
+
 	/** List of suspended soft jobs */
 	struct list_head suspended_soft_jobs_list;
 
@@ -402,7 +405,7 @@ struct kbasep_js_kctx_info {
 	 *
 	 * You may not access any of these members from IRQ context.
 	 */
-	struct {
+	struct kbase_jsctx {
 		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
 
 		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c
index 692460710ce0..90c13458ec7c 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_js_policy_cfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -235,16 +235,11 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *
 
 void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
 {
-	struct kbasep_js_policy_cfs_ctx *ctx_info;
-	struct kbasep_js_policy_cfs *policy_info;
 	struct kbase_device *kbdev;
 
 	KBASE_DEBUG_ASSERT(js_policy != NULL);
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
-	policy_info = &js_policy->cfs;
-	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
-
 	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c
index ffc12a538af6..c1851caa95a0 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,22 +24,21 @@
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif				/* CONFIG_DMA_SHARED_BUFFER */
-
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/compat.h>
+#include <linux/version.h>
 
 #include <mali_kbase_config.h>
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
 #include <mali_kbase_cache_policy.h>
 #include <mali_kbase_hw.h>
-#include <mali_kbase_gator.h>
 #include <mali_kbase_hwaccess_time.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 /**
  * @brief Check the zone compatibility of two regions.
@@ -392,13 +391,33 @@ int kbase_add_va_region(struct kbase_context *kctx,
 	{
 		u64 start_pfn;
 
-		tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align);
-		if (!tmp) {
+		/*
+		 * Depending on the zone the allocation request is for
+		 * we might need to retry it.
+		 */
+		do {
+			tmp = kbase_region_tracker_find_region_meeting_reqs(
+					kctx, reg, nr_pages, align);
+			if (tmp) {
+				start_pfn = (tmp->start_pfn + align - 1) &
+						~(align - 1);
+				err = kbase_insert_va_region_nolock(kctx, reg,
+						tmp, start_pfn, nr_pages);
+				break;
+			}
+
+			/*
+			 * If the allocation is not from the same zone as JIT
+			 * then don't retry, we're out of VA and there is
+			 * nothing which can be done about it.
+			 */
+			if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+					KBASE_REG_ZONE_CUSTOM_VA)
+				break;
+		} while (kbase_jit_evict(kctx));
+
+		if (!tmp)
 			err = -ENOMEM;
-			goto exit;
-		}
-		start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1);
-		err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages);
 	}
 
  exit:
@@ -410,7 +429,10 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region);
 /**
  * @brief Initialize the internal region tracker data structure.
  */
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg)
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+		struct kbase_va_region *same_va_reg,
+		struct kbase_va_region *exec_reg,
+		struct kbase_va_region *custom_va_reg)
 {
 	kctx->reg_rbtree = RB_ROOT;
 	kbase_region_tracker_insert(kctx, same_va_reg);
@@ -448,6 +470,11 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
 	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
 	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 same_va_pages;
+	int err;
+
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
 
 #if defined(CONFIG_ARM64)
 	same_va_bits = VA_BITS;
@@ -464,24 +491,29 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		same_va_bits = 33;
 #endif
 
-	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits)
-		return -EINVAL;
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+		err = -EINVAL;
+		goto fail_unlock;
+	}
 
+	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
 	/* all have SAME_VA */
 	same_va_reg = kbase_alloc_free_region(kctx, 1,
-			(1ULL << (same_va_bits - PAGE_SHIFT)) - 1,
+			same_va_pages,
 			KBASE_REG_ZONE_SAME_VA);
 
-	if (!same_va_reg)
-		return -ENOMEM;
+	if (!same_va_reg) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
 
 #ifdef CONFIG_64BIT
-	/* only 32-bit clients have the other two zones */
+	/* 32-bit clients have exec and custom VA zones */
 	if (kctx->is_compat) {
 #endif
 		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
-			kbase_free_alloced_region(same_va_reg);
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail_free_same_va;
 		}
 		/* If the current size of TMEM is out of range of the
 		 * virtual address space addressable by the MMU then
@@ -496,8 +528,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 				KBASE_REG_ZONE_EXEC);
 
 		if (!exec_reg) {
-			kbase_free_alloced_region(same_va_reg);
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_free_same_va;
 		}
 
 		custom_va_reg = kbase_alloc_free_region(kctx,
@@ -505,9 +537,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
 
 		if (!custom_va_reg) {
-			kbase_free_alloced_region(same_va_reg);
-			kbase_free_alloced_region(exec_reg);
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_free_exec;
 		}
 #ifdef CONFIG_64BIT
 	}
@@ -515,7 +546,108 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 
 	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
 
+	kctx->same_va_end = same_va_pages + 1;
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_free_exec:
+	kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+	kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+	struct kbase_va_region *same_va;
+	struct kbase_va_region *custom_va_reg;
+	u64 same_va_bits;
+	u64 total_va_size;
+	int err;
+
+	/*
+	 * Nothing to do for 32-bit clients, JIT uses the existing
+	 * custom VA zone.
+	 */
+	if (kctx->is_compat)
+		return 0;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+	if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+
+	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/*
+	 * Modify the same VA free region after creation. Be careful to ensure
+	 * that allocations haven't been made as they could cause an overlap
+	 * to happen with existing same VA allocations and the custom VA zone.
+	 */
+	same_va = kbase_region_tracker_find_region_base_address(kctx,
+			PAGE_SIZE);
+	if (!same_va) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* The region flag or region size has changed since creation so bail. */
+	if ((!(same_va->flags & KBASE_REG_FREE)) ||
+			(same_va->nr_pages != total_va_size)) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	if (same_va->nr_pages < jit_va_pages ||
+			kctx->same_va_end < jit_va_pages) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	/* It's safe to adjust the same VA zone now */
+	same_va->nr_pages -= jit_va_pages;
+	kctx->same_va_end -= jit_va_pages;
+
+	/*
+	 * Create a custom VA zone at the end of the VA for allocations which
+	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 */
+	custom_va_reg = kbase_alloc_free_region(kctx,
+				kctx->same_va_end,
+				jit_va_pages,
+				KBASE_REG_ZONE_CUSTOM_VA);
+	if (!custom_va_reg) {
+		/*
+		 * The context will be destroyed if we fail here so no point
+		 * reverting the change we made to same_va.
+		 */
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	kbase_region_tracker_insert(kctx, custom_va_reg);
+
+	kbase_gpu_vm_unlock(kctx);
+	return 0;
+
+fail_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+#else
 	return 0;
+#endif
 }
 
 int kbase_mem_init(struct kbase_device *kbdev)
@@ -613,8 +745,46 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
  */
 void kbase_free_alloced_region(struct kbase_va_region *reg)
 {
-	KBASE_DEBUG_ASSERT(NULL != reg);
 	if (!(reg->flags & KBASE_REG_FREE)) {
+		/*
+		 * The physical allocation should have been removed from the
+		 * eviction list before this function is called. However, in the
+		 * case of abnormal process termination or the app leaking the
+		 * memory kbase_mem_free_region is not called so it can still be
+		 * on the list at termination time of the region tracker.
+		 */
+		if (!list_empty(&reg->gpu_alloc->evict_node)) {
+			/*
+			 * Unlink the physical allocation before unmaking it
+			 * evictable so that the allocation isn't grown back to
+			 * its last backed size as we're going to unmap it
+			 * anyway.
+			 */
+			reg->cpu_alloc->reg = NULL;
+			if (reg->cpu_alloc != reg->gpu_alloc)
+				reg->gpu_alloc->reg = NULL;
+
+			/*
+			 * If a region has been made evictable then we must
+			 * unmake it before trying to free it.
+			 * If the memory hasn't been reclaimed it will be
+			 * unmapped and freed below, if it has been reclaimed
+			 * then the operations below are no-ops.
+			 */
+			if (reg->flags & KBASE_REG_DONT_NEED) {
+				KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+						   KBASE_MEM_TYPE_NATIVE);
+				kbase_mem_evictable_unmake(reg->gpu_alloc);
+			}
+		}
+
+		/*
+		 * Remove the region from the sticky resource metadata
+		 * list should it be there.
+		 */
+		kbase_sticky_resource_release(reg->kctx, NULL,
+				reg->start_pfn << PAGE_SHIFT);
+
 		kbase_mem_phy_alloc_put(reg->cpu_alloc);
 		kbase_mem_phy_alloc_put(reg->gpu_alloc);
 		/* To detect use-after-free in debug builds */
@@ -625,41 +795,6 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
 
 KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
 
-void kbase_mmu_update(struct kbase_context *kctx)
-{
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-	/* ASSERT that the context has a valid as_nr, which is only the case
-	 * when it's scheduled in.
-	 *
-	 * as_nr won't change because the caller has the runpool_irq lock */
-	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
-
-	kctx->kbdev->mmu_mode->update(kctx);
-}
-
-KBASE_EXPORT_TEST_API(kbase_mmu_update);
-
-void kbase_mmu_disable(struct kbase_context *kctx)
-{
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-	/* ASSERT that the context has a valid as_nr, which is only the case
-	 * when it's scheduled in.
-	 *
-	 * as_nr won't change because the caller has the runpool_irq lock */
-	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-
-	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
-}
-
-KBASE_EXPORT_TEST_API(kbase_mmu_disable);
-
-void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
-{
-	kbdev->mmu_mode->disable_as(kbdev, as_nr);
-}
-
 int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
 {
 	int err;
@@ -891,10 +1026,10 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 
 	/* find the region where the virtual address is contained */
 	reg = kbase_region_tracker_find_region_enclosing_address(kctx,
-			sset->mem_handle);
+			sset->mem_handle.basep.handle);
 	if (!reg) {
 		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
-				sset->mem_handle);
+				sset->mem_handle.basep.handle);
 		err = -EINVAL;
 		goto out_unlock;
 	}
@@ -908,7 +1043,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
 	if (!map) {
 		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
-				start, sset->mem_handle);
+				start, sset->mem_handle.basep.handle);
 		err = -EINVAL;
 		goto out_unlock;
 	}
@@ -989,17 +1124,34 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(NULL != reg);
 	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Unlink the physical allocation before unmaking it evictable so
+	 * that the allocation isn't grown back to its last backed size
+	 * as we're going to unmap it anyway.
+	 */
+	reg->cpu_alloc->reg = NULL;
+	if (reg->cpu_alloc != reg->gpu_alloc)
+		reg->gpu_alloc->reg = NULL;
+
+	/*
+	 * If a region has been made evictable then we must unmake it
+	 * before trying to free it.
+	 * If the memory hasn't been reclaimed it will be unmapped and freed
+	 * below, if it has been reclaimed then the operations below are no-ops.
+	 */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+				   KBASE_MEM_TYPE_NATIVE);
+		kbase_mem_evictable_unmake(reg->gpu_alloc);
+	}
+
 	err = kbase_gpu_munmap(kctx, reg);
 	if (err) {
 		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
 		goto out;
 	}
-#ifndef CONFIG_MALI_NO_MALI
-	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
-		/* Wait for GPU to flush write buffer before freeing physical pages */
-		kbase_wait_write_flush(kctx);
-	}
-#endif
+
 	/* This will also free the physical pages */
 	kbase_free_alloced_region(reg);
 
@@ -1046,7 +1198,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 		kbase_free_alloced_region(reg);
 	} else {
 		/* A real GPU va */
-
 		/* Validate the region */
 		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
 		if (!reg || (reg->flags & KBASE_REG_FREE)) {
@@ -1063,7 +1214,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 			err = -EINVAL;
 			goto out_unlock;
 		}
-
 		err = kbase_mem_free_region(kctx, reg);
 	}
 
@@ -1125,8 +1275,8 @@ int kbase_alloc_phy_pages_helper(
 	size_t nr_pages_requested)
 {
 	int new_page_count __maybe_unused;
+	size_t old_page_count = alloc->nents;
 
-	KBASE_DEBUG_ASSERT(alloc);
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
 	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
 
@@ -1142,14 +1292,21 @@ int kbase_alloc_phy_pages_helper(
 	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
 
 	if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool,
-			nr_pages_requested, alloc->pages + alloc->nents) != 0)
+			nr_pages_requested, alloc->pages + old_page_count) != 0)
 		goto no_alloc;
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+	/*
+	 * Request a zone cache update, this scans only the new pages an
+	 * appends their information to the zone cache. if the update
+	 * fails then clear the cache so we fall-back to doing things
+	 * page by page.
+	 */
+	if (kbase_zone_cache_update(alloc, old_page_count) != 0)
+		kbase_zone_cache_clear(alloc);
+
 	kbase_tlstream_aux_pagesalloc(
 			(u32)alloc->imported.kctx->id,
 			(u64)new_page_count);
-#endif
 
 	alloc->nents += nr_pages_requested;
 done:
@@ -1167,11 +1324,12 @@ int kbase_free_phy_pages_helper(
 	struct kbase_mem_phy_alloc *alloc,
 	size_t nr_pages_to_free)
 {
+	struct kbase_context *kctx = alloc->imported.kctx;
 	bool syncback;
+	bool reclaimed = (alloc->evicted != 0);
 	phys_addr_t *start_free;
 	int new_page_count __maybe_unused;
 
-	KBASE_DEBUG_ASSERT(alloc);
 	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
 	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
 	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
@@ -1184,22 +1342,37 @@ int kbase_free_phy_pages_helper(
 
 	syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
 
-	kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool,
+	/*
+	 * Clear the zone cache, we don't expect JIT allocations to be
+	 * shrunk in parts so there is no point trying to optimize for that
+	 * by scanning for the changes caused by freeing this memory and
+	 * updating the existing cache entries.
+	 */
+	kbase_zone_cache_clear(alloc);
+
+	kbase_mem_pool_free_pages(&kctx->mem_pool,
 				  nr_pages_to_free,
 				  start_free,
-				  syncback);
+				  syncback,
+				  reclaimed);
 
 	alloc->nents -= nr_pages_to_free;
-	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
-	new_page_count = kbase_atomic_sub_pages(
-			nr_pages_to_free, &alloc->imported.kctx->used_pages);
-	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-	kbase_tlstream_aux_pagesalloc(
-			(u32)alloc->imported.kctx->id,
-			(u64)new_page_count);
-#endif
+	/*
+	 * If the allocation was not evicted (i.e. evicted == 0) then
+	 * the page accounting needs to be done.
+	 */
+	if (!reclaimed) {
+		kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+		new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+							&kctx->used_pages);
+		kbase_atomic_sub_pages(nr_pages_to_free,
+				       &kctx->kbdev->memdev.used_pages);
+
+		kbase_tlstream_aux_pagesalloc(
+				(u32)kctx->id,
+				(u64)new_page_count);
+	}
 
 	return 0;
 }
@@ -1212,7 +1385,12 @@ void kbase_mem_kref_free(struct kref *kref)
 
 	switch (alloc->type) {
 	case KBASE_MEM_TYPE_NATIVE: {
-		KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+		WARN_ON(!alloc->imported.kctx);
+		/*
+		 * The physical allocation must have been removed from the
+		 * eviction list before trying to free it.
+		 */
+		WARN_ON(!list_empty(&alloc->evict_node));
 		kbase_free_phy_pages_helper(alloc, alloc->nents);
 		break;
 	}
@@ -1246,6 +1424,8 @@ void kbase_mem_kref_free(struct kref *kref)
 		break;
 #endif
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		if (alloc->imported.user_buf.mm)
+			mmdrop(alloc->imported.user_buf.mm);
 		kfree(alloc->imported.user_buf.pages);
 		break;
 	case KBASE_MEM_TYPE_TB:{
@@ -1290,9 +1470,11 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size
 	if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
 		goto out_term;
 
+	reg->cpu_alloc->reg = reg;
 	if (reg->cpu_alloc != reg->gpu_alloc) {
 		if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
 			goto out_rollback;
+		reg->gpu_alloc->reg = reg;
 	}
 
 	return 0;
@@ -1332,6 +1514,10 @@ bool kbase_check_alloc_flags(unsigned long flags)
 	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
 		return false;
 
+	/* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
+	if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
+		return false;
+
 	return true;
 }
 
@@ -1386,3 +1572,923 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx)
 }
 
 KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+#ifdef CONFIG_DEBUG_FS
+struct kbase_jit_debugfs_data {
+	int (*func)(struct kbase_jit_debugfs_data *);
+	struct mutex lock;
+	struct kbase_context *kctx;
+	u64 active_value;
+	u64 pool_value;
+	u64 destroy_value;
+	char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+		struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+	struct kbase_jit_debugfs_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->func = func;
+	mutex_init(&data->lock);
+	data->kctx = (struct kbase_context *) inode->i_private;
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+		char __user *buf, size_t len, loff_t *ppos)
+{
+	struct kbase_jit_debugfs_data *data;
+	size_t size;
+	int ret;
+
+	data = (struct kbase_jit_debugfs_data *) file->private_data;
+	mutex_lock(&data->lock);
+
+	if (*ppos) {
+		size = strnlen(data->buffer, sizeof(data->buffer));
+	} else {
+		if (!data->func) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		if (data->func(data)) {
+			ret = -EACCES;
+			goto out_unlock;
+		}
+
+		size = scnprintf(data->buffer, sizeof(data->buffer),
+				"%llu,%llu,%llu", data->active_value,
+				data->pool_value, data->destroy_value);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+		struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+	return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+	.owner = THIS_MODULE, \
+	.open = __fops ## _open, \
+	.release = kbase_jit_debugfs_common_release, \
+	.read = kbase_jit_debugfs_common_read, \
+	.write = NULL, \
+	.llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct list_head *tmp;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each(tmp, &kctx->jit_active_head) {
+		data->active_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_pool_head) {
+		data->pool_value++;
+	}
+
+	list_for_each(tmp, &kctx->jit_destroy_head) {
+		data->destroy_value++;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+		kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->nr_pages;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->nr_pages;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+		kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+	struct kbase_context *kctx = data->kctx;
+	struct kbase_va_region *reg;
+
+	mutex_lock(&kctx->jit_lock);
+	list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+		data->active_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+		data->pool_value += reg->gpu_alloc->nents;
+	}
+
+	list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+		data->destroy_value += reg->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+		kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
+{
+	/* Debugfs entry for getting the number of JIT allocations. */
+	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_count_fops);
+
+	/*
+	 * Debugfs entry for getting the total number of virtual pages
+	 * used by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_vm_fops);
+
+	/*
+	 * Debugfs entry for getting the number of physical pages used
+	 * by JIT allocations.
+	 */
+	debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+			kctx, &kbase_jit_debugfs_phys_fops);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+	struct kbase_context *kctx;
+	struct kbase_va_region *reg;
+
+	kctx = container_of(work, struct kbase_context, jit_work);
+	do {
+		mutex_lock(&kctx->jit_lock);
+		if (list_empty(&kctx->jit_destroy_head))
+			reg = NULL;
+		else
+			reg = list_first_entry(&kctx->jit_destroy_head,
+				struct kbase_va_region, jit_node);
+
+		if (reg) {
+			list_del(&reg->jit_node);
+			mutex_unlock(&kctx->jit_lock);
+
+			kbase_gpu_vm_lock(kctx);
+			kbase_mem_free_region(kctx, reg);
+			kbase_gpu_vm_unlock(kctx);
+		} else
+			mutex_unlock(&kctx->jit_lock);
+	} while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->jit_active_head);
+	INIT_LIST_HEAD(&kctx->jit_pool_head);
+	INIT_LIST_HEAD(&kctx->jit_destroy_head);
+	mutex_init(&kctx->jit_lock);
+	INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+	return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info)
+{
+	struct kbase_va_region *reg = NULL;
+	struct kbase_va_region *walker;
+	struct kbase_va_region *temp;
+	size_t current_diff = SIZE_MAX;
+
+	int ret;
+
+	mutex_lock(&kctx->jit_lock);
+	/*
+	 * Scan the pool for an existing allocation which meets our
+	 * requirements and remove it.
+	 */
+	list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+		if (walker->nr_pages >= info->va_pages) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been
+			 * meet, it's suitable but other allocations
+			 * might be a better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				reg = walker;
+			}
+
+			/* The allocation is an exact match, stop looking */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	if (reg) {
+		/*
+		 * Remove the found region from the pool and add it to the
+		 * active list.
+		 */
+		list_del_init(&reg->jit_node);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+
+		/* Release the jit lock before modifying the allocation */
+		mutex_unlock(&kctx->jit_lock);
+
+		kbase_gpu_vm_lock(kctx);
+
+		/* Make the physical backing no longer reclaimable */
+		if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+			goto update_failed;
+
+		/* Grow the backing if required */
+		if (reg->gpu_alloc->nents < info->commit_pages) {
+			size_t delta;
+			size_t old_size = reg->gpu_alloc->nents;
+
+			/* Allocate some more pages */
+			delta = info->commit_pages - reg->gpu_alloc->nents;
+			if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+					!= 0)
+				goto update_failed;
+
+			if (reg->cpu_alloc != reg->gpu_alloc) {
+				if (kbase_alloc_phy_pages_helper(
+						reg->cpu_alloc, delta) != 0) {
+					kbase_free_phy_pages_helper(
+							reg->gpu_alloc, delta);
+					goto update_failed;
+				}
+			}
+
+			ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+					info->commit_pages, old_size);
+			/*
+			 * The grow failed so put the allocation back in the
+			 * pool and return failure.
+			 */
+			if (ret)
+				goto update_failed;
+		}
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* No suitable JIT allocation was found so create a new one */
+		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
+				BASE_MEM_COHERENT_LOCAL;
+		u64 gpu_addr;
+		u16 alignment;
+
+		mutex_unlock(&kctx->jit_lock);
+
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+				info->extent, &flags, &gpu_addr, &alignment);
+		if (!reg)
+			goto out_unlocked;
+
+		mutex_lock(&kctx->jit_lock);
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_lock);
+	}
+
+	return reg;
+
+update_failed:
+	/*
+	 * An update to an allocation from the pool failed, chances
+	 * are slim a new allocation would fair any better so return
+	 * the allocation to the pool and return the function with failure.
+	 */
+	kbase_gpu_vm_unlock(kctx);
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+	return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	/* The physical backing of memory in the pool is always reclaimable */
+	down_read(&kctx->process_mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+	kbase_mem_evictable_make(reg->gpu_alloc);
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&kctx->process_mm->mmap_sem);
+
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_pool_head);
+	mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = reg->kctx;
+
+	/*
+	 * JIT allocations will always be on a list, if the region
+	 * is not on a list then it's not a JIT allocation.
+	 */
+	if (list_empty(&reg->jit_node))
+		return;
+
+	/*
+	 * Freeing the allocation requires locks we might not be able
+	 * to take now, so move the allocation to the free list and kick
+	 * the worker which will do the freeing.
+	 */
+	mutex_lock(&kctx->jit_lock);
+	list_del_init(&reg->jit_node);
+	list_add(&reg->jit_node, &kctx->jit_destroy_head);
+	mutex_unlock(&kctx->jit_lock);
+
+	schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+	struct kbase_va_region *reg = NULL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Free the oldest allocation from the pool */
+	mutex_lock(&kctx->jit_lock);
+	if (!list_empty(&kctx->jit_pool_head)) {
+		reg = list_entry(kctx->jit_pool_head.prev,
+				struct kbase_va_region, jit_node);
+		list_del(&reg->jit_node);
+	}
+	mutex_unlock(&kctx->jit_lock);
+
+	if (reg)
+		kbase_mem_free_region(kctx, reg);
+
+	return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+	struct kbase_va_region *walker;
+
+	/* Free all allocations for this context */
+
+	/*
+	 * Flush the freeing of allocations whose backing has been freed
+	 * (i.e. everything in jit_destroy_head).
+	 */
+	cancel_work_sync(&kctx->jit_work);
+
+	kbase_gpu_vm_lock(kctx);
+	/* Free all allocations from the pool */
+	while (!list_empty(&kctx->jit_pool_head)) {
+		walker = list_first_entry(&kctx->jit_pool_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+
+	/* Free all allocations from active list */
+	while (!list_empty(&kctx->jit_active_head)) {
+		walker = list_first_entry(&kctx->jit_active_head,
+				struct kbase_va_region, jit_node);
+		list_del(&walker->jit_node);
+		kbase_mem_free_region(kctx, walker);
+	}
+	kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	phys_addr_t *pa;
+	long i;
+	int err = -ENOMEM;
+	unsigned long address;
+	struct mm_struct *mm;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	mm = alloc->imported.user_buf.mm;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+	pages = alloc->imported.user_buf.pages;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+	pinned_pages = get_user_pages(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#else
+	pinned_pages = get_user_pages_remote(NULL, mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR,
+			0, pages, NULL);
+#endif
+
+	if (pinned_pages <= 0)
+		return pinned_pages;
+
+	if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+		for (i = 0; i < pinned_pages; i++)
+			put_page(pages[i]);
+		return -ENOMEM;
+	}
+
+	dev = kctx->kbdev->dev;
+	offset = address & ~PAGE_MASK;
+	local_size = alloc->imported.user_buf.size;
+
+	for (i = 0; i < pinned_pages; i++) {
+		dma_addr_t dma_addr;
+		unsigned long min;
+
+		min = MIN(PAGE_SIZE - offset, local_size);
+		dma_addr = dma_map_page(dev, pages[i],
+				offset, min,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto unwind;
+
+		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+		pa[i] = page_to_phys(pages[i]);
+
+		local_size -= min;
+		offset = 0;
+	}
+
+	alloc->nents = pinned_pages;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+			kbase_reg_current_backed_size(reg),
+			reg->flags);
+	if (err == 0)
+		return 0;
+
+	alloc->nents = 0;
+	/* fall down */
+unwind:
+	while (i--) {
+		dma_unmap_page(kctx->kbdev->dev,
+				alloc->imported.user_buf.dma_addrs[i],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+	long i;
+	struct page **pages;
+	unsigned long size = alloc->imported.user_buf.size;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	pages = alloc->imported.user_buf.pages;
+	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+		unsigned long local_size;
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+				DMA_BIDIRECTIONAL);
+		if (writeable)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+		pages[i] = NULL;
+
+		size -= local_size;
+	}
+	alloc->nents = 0;
+}
+
+
+/* to replace sg_dma_len. */
+#define MALI_SG_DMA_LEN(sg)        ((sg)->length)
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = reg->gpu_alloc;
+
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(MALI_SG_DMA_LEN(s));
+
+		WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1),
+		"MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n",
+		MALI_SG_DMA_LEN(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+				count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+			kbase_get_gpu_phy_pages(reg),
+			kbase_reg_current_backed_size(reg),
+			reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (err) {
+		dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+		struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+		|| defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+		struct kds_resource **kds_resources, u32 *kds_res_count,
+		unsigned long *kds_access_bitmap, bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		)
+{
+	int err;
+
+	/* decide what needs to happen for this resource */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+			goto exit;
+
+		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+			err = kbase_jd_user_buf_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+	}
+	break;
+	case KBASE_MEM_TYPE_IMPORTED_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = ump_dd_kds_resource_get(
+					reg->gpu_alloc->imported.ump_handle);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+		break;
+	}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		if (kds_res_count) {
+			struct kds_resource *kds_res;
+
+			kds_res = get_dma_buf_kds_resource(
+					reg->gpu_alloc->imported.umm.dma_buf);
+			if (kds_res)
+				add_kds_resource(kds_res, kds_resources,
+						kds_res_count,
+						kds_access_bitmap, exclusive);
+		}
+#endif
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+			err = kbase_jd_umm_map(kctx, reg);
+			if (err) {
+				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+				goto exit;
+			}
+		}
+		break;
+	}
+#endif
+	default:
+		goto exit;
+	}
+
+	return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+	return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		alloc->imported.umm.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.umm.current_mapping_usage_count) {
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			kbase_jd_umm_unmap(kctx, alloc);
+		}
+	}
+	break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+		alloc->imported.user_buf.current_mapping_usage_count--;
+
+		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+			bool writeable = true;
+
+			if (reg && reg->gpu_alloc == alloc)
+				kbase_mmu_teardown_pages(
+						kctx,
+						reg->start_pfn,
+						kbase_reg_current_backed_size(reg));
+
+			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+				writeable = false;
+
+			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+		}
+	}
+	break;
+	default:
+	break;
+	}
+	kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *meta = NULL;
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Walk the per context external resource metadata list for the
+	 * metadata which matches the region which is being acquired.
+	 */
+	list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+		if (walker->gpu_addr == gpu_addr) {
+			meta = walker;
+			break;
+		}
+	}
+
+	/* No metadata exists so create one. */
+	if (!meta) {
+		struct kbase_va_region *reg;
+
+		/* Find the region */
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				kctx, gpu_addr);
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			goto failed;
+
+		/* Allocate the metadata object */
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			goto failed;
+
+		/*
+		 * Fill in the metadata object and acquire a reference
+		 * for the physical resource.
+		 */
+		meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+				, NULL, NULL,
+				NULL, false
+#endif
+				);
+
+		if (!meta->alloc)
+			goto fail_map;
+
+		meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+
+		list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+	}
+
+	return meta;
+
+fail_map:
+	kfree(meta);
+failed:
+	return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+	struct kbase_va_region *reg;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Search of the metadata if one isn't provided. */
+	if (!meta) {
+		/*
+		 * Walk the per context external resource metadata list for the
+		 * metadata which matches the region which is being released.
+		 */
+		list_for_each_entry(walker, &kctx->ext_res_meta_head,
+				ext_res_node) {
+			if (walker->gpu_addr == gpu_addr) {
+				meta = walker;
+				break;
+			}
+		}
+	}
+
+	/* No metadata so just return. */
+	if (!meta)
+		return false;
+
+	/* Drop the physical memory reference and free the metadata. */
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx,
+			meta->gpu_addr);
+
+	kbase_unmap_external_resource(kctx, reg, meta->alloc);
+	list_del(&meta->ext_res_node);
+	kfree(meta);
+
+	return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+	return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+	struct kbase_ctx_ext_res_meta *walker;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Free any sticky resources which haven't been unmapped.
+	 *
+	 * Note:
+	 * We don't care about refcounts at this point as no future
+	 * references to the meta data will be made.
+	 * Region termination would find these if we didn't free them
+	 * here, but it's more efficient if we do the clean up here.
+	 */
+	while (!list_empty(&kctx->ext_res_meta_head)) {
+		walker = list_first_entry(&kctx->ext_res_meta_head,
+				struct kbase_ctx_ext_res_meta, ext_res_node);
+
+		kbase_sticky_resource_release(kctx, walker, 0);
+	}
+}
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h
index 7372e1088bd4..7b2433e868bd 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,9 @@
 #endif
 
 #include <linux/kref.h>
-
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
 #ifdef CONFIG_UMP
 #include <linux/ump.h>
 #endif				/* CONFIG_UMP */
@@ -41,6 +43,8 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include "mali_kbase_gator.h"
 #endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
 
 /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
@@ -112,11 +116,23 @@ struct kbase_mem_phy_alloc {
 	/* kbase_cpu_mappings */
 	struct list_head      mappings;
 
+	/* Node used to store this allocation on the eviction list */
+	struct list_head      evict_node;
+	/* Physical backing size when the pages where evicted */
+	size_t                evicted;
+	/*
+	 * Back reference to the region structure which created this
+	 * allocation, or NULL if it has been freed.
+	 */
+	struct kbase_va_region *reg;
+
 	/* type of buffer */
 	enum kbase_memory_type type;
 
 	unsigned long properties;
 
+	struct list_head       zone_cache;
+
 	/* member in union valid based on @a type */
 	union {
 #ifdef CONFIG_UMP
@@ -143,7 +159,7 @@ struct kbase_mem_phy_alloc {
 			unsigned long nr_pages;
 			struct page **pages;
 			unsigned int current_mapping_usage_count;
-			struct task_struct *owner;
+			struct mm_struct *mm;
 			dma_addr_t *dma_addrs;
 		} user_buf;
 	} imported;
@@ -242,6 +258,8 @@ struct kbase_va_region {
 
 #define KBASE_REG_SECURE            (1ul << 19)
 
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
 #define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
 
 /* only used with 32-bit clients */
@@ -276,6 +294,8 @@ struct kbase_va_region {
 	/* non-NULL if this memory object is a kds_resource */
 	struct kds_resource *kds_res;
 
+	/* List head used to store the region in the JIT allocation pool */
+	struct list_head jit_node;
 };
 
 /* Common functions */
@@ -355,6 +375,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en
 	alloc->pages = (void *)(alloc + 1);
 	INIT_LIST_HEAD(&alloc->mappings);
 	alloc->type = type;
+	INIT_LIST_HEAD(&alloc->zone_cache);
 
 	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
 		alloc->imported.user_buf.dma_addrs =
@@ -378,14 +399,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
 	else if (!reg->cpu_alloc)
 		return -ENOMEM;
 	reg->cpu_alloc->imported.kctx = kctx;
+	INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
 	if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
 		reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
 				KBASE_MEM_TYPE_NATIVE);
 		reg->gpu_alloc->imported.kctx = kctx;
+		INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
 	} else {
 		reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
 	}
 
+	INIT_LIST_HEAD(&reg->jit_node);
 	reg->flags &= ~KBASE_REG_FREE;
 	return 0;
 }
@@ -505,11 +529,13 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
  * @pages:    Pointer to array holding the physical addresses of the pages to
  *            free.
  * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
  *
  * Like kbase_mem_pool_free() but optimized for freeing many pages.
  */
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
-		phys_addr_t *pages, bool dirty);
+		phys_addr_t *pages, bool dirty, bool reclaimed);
 
 /**
  * kbase_mem_pool_size - Get number of free pages in memory pool
@@ -561,6 +587,7 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
 
 
 int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
 struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
@@ -591,6 +618,9 @@ void kbase_mmu_term(struct kbase_context *kctx);
 
 phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
 void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
 int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 				  phys_addr_t *phys, size_t nr,
 				  unsigned long flags);
@@ -623,6 +653,12 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
 void kbase_mmu_update(struct kbase_context *kctx);
 
 /**
+ * kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
+ * @kctx:	Kbase context
+ *
+ * Disable and perform the required cache maintenance to remove the all
+ * data from provided kbase context from the GPU caches.
+ *
  * The caller has the following locking conditions:
  * - It must hold kbase_as::transaction_mutex on kctx's address space
  * - It must hold the kbasep_js_device_data::runpool_irq::lock
@@ -630,11 +666,13 @@ void kbase_mmu_update(struct kbase_context *kctx);
 void kbase_mmu_disable(struct kbase_context *kctx);
 
 /**
- * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space.
- *
+ * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified
+ * address space.
  * @kbdev:	Kbase device
- * @as_nr:	Number of the address space for which the MMU
- *		should be set in unmapped mode.
+ * @as_nr:	The address space number to set to unmapped.
+ *
+ * This function must only be called during reset/power-up and it used to
+ * ensure the registers are in a known state.
  *
  * The caller must hold kbdev->as[as_nr].transaction_mutex.
  */
@@ -854,4 +892,166 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir);
 
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+		struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+		struct kbase_context *kctx, struct kbase_va_region *reg,
+		struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+		, u32 *kds_res_count, struct kds_resource **kds_resources,
+		unsigned long *kds_access_bitmap, bool exclusive
+#endif
+		);
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+		struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+		struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
+
+/**
+ * kbase_zone_cache_update - Update the memory zone cache after new pages have
+ * been added.
+ * @alloc:        The physical memory allocation to build the cache for.
+ * @start_offset: Offset to where the new pages start.
+ *
+ * Updates an existing memory zone cache, updating the counters for the
+ * various zones.
+ * If the memory allocation doesn't already have a zone cache assume that
+ * one isn't created and thus don't do anything.
+ *
+ * Return: Zero cache was updated, negative error code on error.
+ */
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset);
+
+/**
+ * kbase_zone_cache_build - Build the memory zone cache.
+ * @alloc:        The physical memory allocation to build the cache for.
+ *
+ * Create a new zone cache for the provided physical memory allocation if
+ * one doesn't already exist, if one does exist then just return.
+ *
+ * Return: Zero if the zone cache was created, negative error code on error.
+ */
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_zone_cache_clear - Clear the memory zone cache.
+ * @alloc:        The physical memory allocation to clear the cache on.
+ */
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc);
+
 #endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c
index b359f4d94148..f91d3c916355 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,15 +36,52 @@
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_tlstream.h>
 
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
 static const struct vm_operations_struct kbase_vm_ops;
 
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
 {
 	int zone;
@@ -77,9 +114,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 #if defined(CONFIG_64BIT)
 	if (kctx->is_compat)
 		cpu_va_bits = 32;
-	else
-		/* force SAME_VA if a 64-bit client */
-		*flags |= BASE_MEM_SAME_VA;
 #endif
 
 	if (!kbase_check_alloc_flags(*flags)) {
@@ -195,8 +229,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		if (*flags & BASE_MEM_PROT_CPU_WR)
 			prot |= PROT_WRITE;
 
-		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED,
-				cookie);
+		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+				MAP_SHARED, cookie);
+
 		if (IS_ERR_VALUE(cpu_addr)) {
 			kctx->pending_regions[cookie_nr] = NULL;
 			kctx->cookies |= (1UL << cookie_nr);
@@ -343,12 +378,412 @@ out_unlock:
 	return ret;
 }
 
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long pages = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+		pages += alloc->nents;
+
+	mutex_unlock(&kctx->evict_lock);
+	return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	struct kbase_context *kctx;
+	struct kbase_mem_phy_alloc *alloc;
+	struct kbase_mem_phy_alloc *tmp;
+	unsigned long freed = 0;
+
+	kctx = container_of(s, struct kbase_context, reclaim);
+	mutex_lock(&kctx->evict_lock);
+
+	list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+		int err;
+
+		err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+				0, alloc->nents);
+		if (err != 0) {
+			/*
+			 * Failed to remove GPU mapping, tell the shrinker
+			 * to stop trying to shrink our slab even though we
+			 * have pages in it.
+			 */
+			freed = -1;
+			goto out_unlock;
+		}
+
+		/*
+		 * Update alloc->evicted before freeing the backing so the
+		 * helper can determine that it needs to bypass the accounting
+		 * and memory pool.
+		 */
+		alloc->evicted = alloc->nents;
+
+		kbase_free_phy_pages_helper(alloc, alloc->evicted);
+		freed += alloc->evicted;
+		list_del_init(&alloc->evict_node);
+
+		/*
+		 * Inform the JIT allocator this region has lost backing
+		 * as it might need to free the allocation.
+		 */
+		kbase_jit_backing_lost(alloc->reg);
+
+		/* Enough pages have been freed so stop now */
+		if (freed > sc->nr_to_scan)
+			break;
+	}
+out_unlock:
+	mutex_unlock(&kctx->evict_lock);
+
+	return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+	return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->evict_list);
+	mutex_init(&kctx->evict_lock);
+
+	/* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+	kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+	kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+	kctx->reclaim.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	kctx->reclaim.batch = 0;
+#endif
+	register_shrinker(&kctx->reclaim);
+	return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+	unregister_shrinker(&kctx->reclaim);
+}
+
+struct kbase_mem_zone_cache_entry {
+	/* List head used to link the cache entry to the memory allocation. */
+	struct list_head zone_node;
+	/* The zone the cacheline is for. */
+	struct zone *zone;
+	/* The number of pages in the allocation which belong to this zone. */
+	u64 count;
+};
+
+static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	struct kbase_mem_zone_cache_entry *cache = NULL;
+	size_t i;
+	int ret = 0;
+
+	for (i = start_offset; i < alloc->nents; i++) {
+		struct page *p = phys_to_page(alloc->pages[i]);
+		struct zone *zone = page_zone(p);
+		bool create = true;
+
+		if (cache && (cache->zone == zone)) {
+			/*
+			 * Fast path check as most of the time adjacent
+			 * pages come from the same zone.
+			 */
+			create = false;
+		} else {
+			/*
+			 * Slow path check, walk all the cache entries to see
+			 * if we already know about this zone.
+			 */
+			list_for_each_entry(cache, &alloc->zone_cache, zone_node) {
+				if (cache->zone == zone) {
+					create = false;
+					break;
+				}
+			}
+		}
+
+		/* This zone wasn't found in the cache, create an entry for it */
+		if (create) {
+			cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+			if (!cache) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+			cache->zone = zone;
+			cache->count = 0;
+			list_add(&cache->zone_node, &alloc->zone_cache);
+		}
+
+		cache->count++;
+	}
+	return 0;
+
+bail:
+	return ret;
+}
+
+int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc,
+		size_t start_offset)
+{
+	/*
+	 * Bail if the zone cache is empty, only update the cache if it
+	 * existed in the first place.
+	 */
+	if (list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, start_offset);
+}
+
+int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc)
+{
+	/* Bail if the zone cache already exists */
+	if (!list_empty(&alloc->zone_cache))
+		return 0;
+
+	return kbase_zone_cache_builder(alloc, 0);
+}
+
+void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_mem_zone_cache_entry *walker;
+
+	while(!list_empty(&alloc->zone_cache)){
+		walker = list_first_entry(&alloc->zone_cache,
+				struct kbase_mem_zone_cache_entry, zone_node);
+		list_del(&walker->zone_node);
+		kfree(walker);
+	}
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_process_page_usage_dec(kctx, alloc->nents);
+	new_page_count = kbase_atomic_sub_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_context *kctx = alloc->imported.kctx;
+	struct kbase_mem_zone_cache_entry *zone_cache;
+	int __maybe_unused new_page_count;
+	int err;
+
+	new_page_count = kbase_atomic_add_pages(alloc->nents,
+						&kctx->used_pages);
+	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters so that the allocation is accounted for
+	 * against the process and thus is visible to the OOM killer,
+	 * then remove it from the reclaimable accounting. */
+	kbase_process_page_usage_inc(kctx, alloc->nents);
+
+	/* Attempt to build a zone cache of tracking */
+	err = kbase_zone_cache_build(alloc);
+	if (err == 0) {
+		/* Bulk update all the zones */
+		list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) {
+			zone_page_state_add(-zone_cache->count,
+					zone_cache->zone, NR_SLAB_RECLAIMABLE);
+		}
+	} else {
+		/* Fall-back to page by page updates */
+		int i;
+
+		for (i = 0; i < alloc->nents; i++) {
+			struct page *p = phys_to_page(alloc->pages[i]);
+			struct zone *zone = page_zone(p);
+
+			zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE);
+		}
+	}
+
+	kbase_tlstream_aux_pagesalloc(
+			(u32)kctx->id,
+			(u64)new_page_count);
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* This alloction can't already be on a list. */
+	WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+	/*
+	 * Try to shrink the CPU mappings as required, if we fail then
+	 * fail the process of making this allocation evictable.
+	 */
+	err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+			0, gpu_alloc->nents);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * Add the allocation to the eviction list, after this point the shrink
+	 * can reclaim it.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+	mutex_unlock(&kctx->evict_lock);
+	kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+	gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+	return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+	struct kbase_context *kctx = gpu_alloc->imported.kctx;
+	int err = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * First remove the allocation from the eviction list as it's no
+	 * longer eligible for eviction.
+	 */
+	mutex_lock(&kctx->evict_lock);
+	list_del_init(&gpu_alloc->evict_node);
+	mutex_unlock(&kctx->evict_lock);
+
+	if (gpu_alloc->evicted == 0) {
+		/*
+		 * The backing is still present, update the VM stats as it's
+		 * in use again.
+		 */
+		kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+	} else {
+		/* If the region is still alive ... */
+		if (gpu_alloc->reg) {
+			/* ... allocate replacement backing ... */
+			err = kbase_alloc_phy_pages_helper(gpu_alloc,
+					gpu_alloc->evicted);
+
+			/*
+			 * ... and grow the mapping back to its
+			 * pre-eviction size.
+			 */
+			if (!err)
+				err = kbase_mem_grow_gpu_mapping(kctx,
+						gpu_alloc->reg,
+						gpu_alloc->evicted, 0);
+
+			gpu_alloc->evicted = 0;
+		}
+	}
+
+	/* If the region is still alive remove the DONT_NEED attribute. */
+	if (gpu_alloc->reg)
+		gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+	return (err == 0);
+}
+
 int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
 {
 	struct kbase_va_region *reg;
 	int ret = -EINVAL;
 	unsigned int real_flags = 0;
 	unsigned int prev_flags = 0;
+	bool prev_needed, new_needed;
 
 	KBASE_DEBUG_ASSERT(kctx);
 
@@ -359,11 +794,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	flags &= mask;
 
 	/* check for only supported flags */
-	if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+	if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
 		goto out;
 
 	/* mask covers bits we don't support? */
-	if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+	if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
 		goto out;
 
 	/* convert flags */
@@ -373,6 +808,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 		real_flags |= KBASE_REG_SHARE_IN;
 
 	/* now we can lock down the context, and find the region */
+	down_write(&current->mm->mmap_sem);
 	kbase_gpu_vm_lock(kctx);
 
 	/* Validate the region */
@@ -380,6 +816,28 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	if (!reg || (reg->flags & KBASE_REG_FREE))
 		goto out_unlock;
 
+	/* Is the region being transitioning between not needed and needed? */
+	prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+	new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+	if (prev_needed != new_needed) {
+		/* Aliased allocations can't be made ephemeral */
+		if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+			goto out_unlock;
+
+		if (new_needed) {
+			/* Only native allocations can be marked not needed */
+			if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = kbase_mem_evictable_make(reg->gpu_alloc);
+			if (ret)
+				goto out_unlock;
+		} else {
+			kbase_mem_evictable_unmake(reg->gpu_alloc);
+		}
+	}
+
 	/* limit to imported memory */
 	if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
 	     (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
@@ -422,6 +880,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 
 out_unlock:
 	kbase_gpu_vm_unlock(kctx);
+	up_write(&current->mm->mmap_sem);
 out:
 	return ret;
 }
@@ -552,6 +1011,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in
 	struct kbase_va_region *reg;
 	struct dma_buf *dma_buf;
 	struct dma_buf_attachment *dma_attachment;
+	bool shared_zone = false;
 
 	dma_buf = dma_buf_get(fd);
 	if (IS_ERR_OR_NULL(dma_buf))
@@ -572,15 +1032,23 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in
 	/* ignore SAME_VA */
 	*flags &= ~BASE_MEM_SAME_VA;
 
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
 #ifdef CONFIG_64BIT
 	if (!kctx->is_compat) {
-		/* 64-bit tasks must MMAP anyway, but not expose this address to clients */
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
 		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
 	} else {
-#else
-	if (1) {
-#endif
 		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
@@ -618,7 +1086,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in
 
 	/* no read or write permission given on import, only on run do we give the right permissions */
 
-	reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM;
+	reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM;
 	reg->gpu_alloc->imported.umm.sgt = NULL;
 	reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
 	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
@@ -647,6 +1115,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	struct kbase_va_region *reg;
 	long faulted_pages;
 	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	bool shared_zone = false;
 
 	*va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
 		PFN_DOWN(address);
@@ -660,14 +1129,24 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	/* SAME_VA generally not supported with imported memory (no known use cases) */
 	*flags &= ~BASE_MEM_SAME_VA;
 
+	if (*flags & BASE_MEM_IMPORT_SHARED)
+		shared_zone = true;
+
 #ifdef CONFIG_64BIT
 	if (!kctx->is_compat) {
-		/* 64-bit tasks must MMAP anyway, but not expose this address to
-		 * clients */
+		/*
+		 * 64-bit tasks require us to reserve VA on the CPU that we use
+		 * on the GPU.
+		 */
+		shared_zone = true;
+	}
+#endif
+
+	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
 		zone = KBASE_REG_ZONE_SAME_VA;
 	}
-#endif
+
 	reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
 
 	if (!reg)
@@ -705,8 +1184,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	/* We can't really store the page list because that would involve */
 	/* keeping the pages pinned - instead we pin/unpin around the job */
 	/* (as part of the external resources handling code) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
 	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
 			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#else
+	faulted_pages = get_user_pages(address, *va_pages,
+			reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+#endif
 	up_read(&current->mm->mmap_sem);
 
 	if (faulted_pages != *va_pages)
@@ -717,7 +1201,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
 	reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
 			sizeof(struct page *), GFP_KERNEL);
-	reg->gpu_alloc->imported.user_buf.owner = current;
+	reg->gpu_alloc->imported.user_buf.mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
 
 	if (!reg->gpu_alloc->imported.user_buf.pages)
 		goto no_page_array;
@@ -817,8 +1302,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 
 	/* validate and add src handles */
 	for (i = 0; i < nents; i++) {
-		if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) {
-			if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE)
+		if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle.basep.handle !=
+			    BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
 				goto bad_handle; /* unsupported magic handle */
 			if (!ai[i].length)
 				goto bad_handle; /* must be > 0 */
@@ -830,13 +1316,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 			struct kbase_va_region *aliasing_reg;
 			struct kbase_mem_phy_alloc *alloc;
 
-			aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT);
+			aliasing_reg = kbase_region_tracker_find_region_base_address(
+				kctx,
+				(ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
 
 			/* validate found region */
 			if (!aliasing_reg)
 				goto bad_handle; /* Not found */
 			if (aliasing_reg->flags & KBASE_REG_FREE)
 				goto bad_handle; /* Free region */
+			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+				goto bad_handle; /* Ephemeral region */
 			if (!aliasing_reg->gpu_alloc)
 				goto bad_handle; /* No alloc */
 			if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
@@ -1058,6 +1548,7 @@ static int zap_range_nolock(struct mm_struct *mm,
 	int err = -EINVAL; /* in case end < start */
 
 	while (start < end) {
+		unsigned long local_start;
 		unsigned long local_end;
 
 		vma = find_vma_intersection(mm, start, end);
@@ -1068,12 +1559,17 @@ static int zap_range_nolock(struct mm_struct *mm,
 		if (vma->vm_ops != vm_ops)
 			goto try_next;
 
+		local_start = vma->vm_start;
+
+		if (start > local_start)
+			local_start = start;
+
 		local_end = vma->vm_end;
 
 		if (end < local_end)
 			local_end = end;
 
-		err = zap_vma_ptes(vma, start, local_end - start);
+		err = zap_vma_ptes(vma, local_start, local_end - local_start);
 		if (unlikely(err))
 			break;
 
@@ -1085,19 +1581,98 @@ try_next:
 	return err;
 }
 
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	phys_addr_t *phy_pages;
+	u64 delta = new_pages - old_pages;
+	int ret = 0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* Map the new pages into the GPU */
+	phy_pages = kbase_get_gpu_phy_pages(reg);
+	ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+			phy_pages + old_pages, delta, reg->flags);
+
+	return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+	struct kbase_cpu_mapping *mapping;
+	int err;
+
+	lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+	list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+		unsigned long mapping_size;
+
+		mapping_size = (mapping->vm_end - mapping->vm_start)
+				>> PAGE_SHIFT;
+
+		/* is this mapping affected ?*/
+		if ((mapping->page_off + mapping_size) > new_pages) {
+			unsigned long first_bad = 0;
+
+			if (new_pages > mapping->page_off)
+				first_bad = new_pages - mapping->page_off;
+
+			err = zap_range_nolock(current->mm,
+					&kbase_vm_ops,
+					mapping->vm_start +
+					(first_bad << PAGE_SHIFT),
+					mapping->vm_end);
+
+			WARN(err,
+			     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+			     mapping->vm_start +
+			     (first_bad << PAGE_SHIFT),
+			     mapping->vm_end
+			     );
+
+			/* The zap failed, give up and exit */
+			if (err)
+				goto failed;
+		}
+	}
+
+	return 0;
+
+failed:
+	return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages)
+{
+	u64 delta = old_pages - new_pages;
+	int ret = 0;
+
+	ret = kbase_mmu_teardown_pages(kctx,
+			reg->start_pfn + new_pages, delta);
+
+	return ret;
+}
+
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
 {
 	u64 old_pages;
 	u64 delta;
 	int res = -EINVAL;
 	struct kbase_va_region *reg;
-	phys_addr_t *phy_pages;
+	bool read_locked = false;
 
 	KBASE_DEBUG_ASSERT(kctx);
 	KBASE_DEBUG_ASSERT(failure_reason);
 	KBASE_DEBUG_ASSERT(gpu_addr != 0);
 
-	down_read(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	kbase_gpu_vm_lock(kctx);
 
 	/* Validate the region */
@@ -1131,6 +1706,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
 		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
 		goto out_unlock;
 	}
+	/* can't grow regions which are ephemeral */
+	if (reg->flags & KBASE_REG_DONT_NEED) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
 
 	if (new_pages == reg->gpu_alloc->nents) {
 		/* no change */
@@ -1138,14 +1718,17 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
 		goto out_unlock;
 	}
 
-	phy_pages = kbase_get_gpu_phy_pages(reg);
 	old_pages = kbase_reg_current_backed_size(reg);
-
 	if (new_pages > old_pages) {
-		/* growing */
-		int err;
-
 		delta = new_pages - old_pages;
+
+		/*
+		 * No update to the mm so downgrade the writer lock to a read
+		 * lock so other readers aren't blocked after this point.
+		 */
+		downgrade_write(&current->mm->mmap_sem);
+		read_locked = true;
+
 		/* Allocate some more pages */
 		if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
 			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
@@ -1160,9 +1743,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
 				goto out_unlock;
 			}
 		}
-		err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
-				phy_pages + old_pages, delta, reg->flags);
-		if (err) {
+
+		/* No update required for CPU mappings, that's done on fault. */
+
+		/* Update GPU mapping. */
+		res = kbase_mem_grow_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+
+		/* On error free the new pages */
+		if (res) {
 			kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 			if (reg->cpu_alloc != reg->gpu_alloc)
 				kbase_free_phy_pages_helper(reg->gpu_alloc,
@@ -1171,60 +1760,35 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
 			goto out_unlock;
 		}
 	} else {
-		/* shrinking */
-		struct kbase_cpu_mapping *mapping;
-		int err;
+		delta = old_pages - new_pages;
 
-		/* first, unmap from any mappings affected */
-		list_for_each_entry(mapping, &reg->cpu_alloc->mappings, mappings_list) {
-			unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT;
-
-			/* is this mapping affected ?*/
-			if ((mapping->page_off + mapping_size) > new_pages) {
-				unsigned long first_bad = 0;
-				int zap_res;
-
-				if (new_pages > mapping->page_off)
-					first_bad = new_pages - mapping->page_off;
-
-				zap_res = zap_range_nolock(current->mm,
-						&kbase_vm_ops,
-						mapping->vm_start +
-						(first_bad << PAGE_SHIFT),
-						mapping->vm_end);
-				WARN(zap_res,
-				     "Failed to zap VA range (0x%lx - 0x%lx);\n",
-				     mapping->vm_start +
-				     (first_bad << PAGE_SHIFT),
-				     mapping->vm_end
-				     );
-			}
+		/* Update all CPU mapping(s) */
+		res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
 		}
 
-		/* Free some pages */
-		delta = old_pages - new_pages;
-		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages,
-				delta);
-		if (err) {
+		/* Update the GPU mapping */
+		res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+				new_pages, old_pages);
+		if (res) {
 			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
 			goto out_unlock;
 		}
-#ifndef CONFIG_MALI_NO_MALI
-		if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
-			/* Wait for GPU to flush write buffer before freeing physical pages */
-			kbase_wait_write_flush(kctx);
-		}
-#endif
+
 		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 		if (reg->cpu_alloc != reg->gpu_alloc)
 			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
 	}
 
-	res = 0;
-
 out_unlock:
 	kbase_gpu_vm_unlock(kctx);
-	up_read(&current->mm->mmap_sem);
+	if (read_locked)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
 
 	return res;
 }
@@ -1296,6 +1860,10 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (map->page_off + rel_pgoff >= map->alloc->nents)
 		goto locked_bad_fault;
 
+	/* Fault on access to DONT_NEED regions */
+	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+		goto locked_bad_fault;
+
 	/* insert all valid pages from the fault location */
 	for (i = rel_pgoff;
 	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
@@ -1663,8 +2231,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma)
 	rcu_read_unlock();
 
 	switch (vma->vm_pgoff) {
-	case PFN_DOWN(BASE_MEM_INVALID_HANDLE):
-	case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE):
+	case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
 		/* Illegal handle for direct map */
 		err = -EINVAL;
 		goto out_unlock;
@@ -1875,8 +2443,8 @@ out:
 
 KBASE_EXPORT_TEST_API(kbase_mmap);
 
-void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
-		struct kbase_vmap_struct *map)
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map)
 {
 	struct kbase_va_region *reg;
 	unsigned long page_index;
@@ -1911,6 +2479,14 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 	if (page_index + page_count > kbase_reg_current_backed_size(reg))
 		goto out_unlock;
 
+	if (reg->flags & KBASE_REG_DONT_NEED)
+		goto out_unlock;
+
+	/* check access permissions can be satisfied
+	 * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
+	if ((reg->flags & prot_request) != prot_request)
+		goto out_unlock;
+
 	page_array = kbase_get_cpu_phy_pages(reg);
 	if (!page_array)
 		goto out_unlock;
@@ -1927,6 +2503,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 		/* Map uncached */
 		prot = pgprot_writecombine(prot);
 	}
+	/* Note: enforcing a RO prot_request onto prot is not done, since:
+	 * - CPU-arch-specific integration required
+	 * - kbase_vmap() requires no access checks to be made/enforced */
 
 	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
 
@@ -1945,6 +2524,12 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
 	sync_needed = map->is_cached;
 
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
+
 	if (sync_needed) {
 		/* Sync first page */
 		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
@@ -1979,6 +2564,17 @@ out_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return NULL;
 }
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	/* 0 is specified for prot_request to indicate no access checks should
+	 * be made.
+	 *
+	 * As mentioned in kbase_vmap_prot() this means that a kernel-side
+	 * CPU-RO mapping is not enforced to allow this to work */
+	return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map);
+}
 KBASE_EXPORT_TEST_API(kbase_vmap);
 
 void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
@@ -1986,6 +2582,11 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
 	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
 	bool sync_needed = map->is_cached;
 	vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+	/* kernel can use coherent memory if supported */
+	if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+		sync_needed = false;
+#endif
 	if (sync_needed) {
 		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
 		size_t size = map->size;
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h
index 6a139fd70234..6471747a7dc5 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_linux.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -42,6 +42,72 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
 int kbase_mmap(struct file *file, struct vm_area_struct *vma);
 
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+		struct kbase_va_region *reg,
+		u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
 struct kbase_vmap_struct {
 	u64 gpu_addr;
 	struct kbase_mem_phy_alloc *cpu_alloc;
@@ -52,8 +118,83 @@ struct kbase_vmap_struct {
 	size_t size;
 	bool is_cached;
 };
+
+
+/**
+ * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the
+ * requested access permissions are supported
+ * @kctx:         Context the VA range belongs to
+ * @gpu_addr:     Start address of VA range
+ * @size:         Size of VA range
+ * @prot_request: Flags indicating how the caller will then access the memory
+ * @map:          Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check
+ * whether the region should allow the intended access, and return an error if
+ * disallowed. This is essential for security of imported memory, particularly
+ * a user buf from SHM mapped into the process as RO. In that case, write
+ * access must be checked if the intention is for kernel to write to the
+ * memory.
+ *
+ * The checks are also there to help catch access errors on memory where
+ * security is not a concern: imported memory that is always RW, and memory
+ * that was allocated and owned by the process attached to @kctx. In this case,
+ * it helps to identify memory that was was mapped with the wrong access type.
+ *
+ * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
+ * where either the security of memory is solely dependent on those flags, or
+ * when userspace code was expecting only the GPU to access the memory (e.g. HW
+ * workarounds).
+ *
+ */
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+		      unsigned long prot_request, struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vmap - Map a GPU VA range into the kernel safely
+ * @kctx:     Context the VA range belongs to
+ * @gpu_addr: Start address of VA range
+ * @size:     Size of VA range
+ * @map:      Structure to be given to kbase_vunmap() on freeing
+ *
+ * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error
+ *
+ * Map a GPU VA Range into the kernel. The VA range must be contained within a
+ * GPU memory region. Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * This is safer than using kmap() on the pages directly,
+ * because the pages here are refcounted to prevent freeing (and hence reuse
+ * elsewhere in the system) until an kbase_vunmap()
+ *
+ * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no
+ * checks to ensure the security of e.g. imported user bufs from RO SHM.
+ */
 void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 		struct kbase_vmap_struct *map);
+
+/**
+ * kbase_vunmap - Unmap a GPU VA range from the kernel
+ * @kctx: Context the VA range belongs to
+ * @map:  Structure describing the mapping from the corresponding kbase_vmap()
+ *        call
+ *
+ * Unmaps a GPU VA range from the kernel, given its @map structure obtained
+ * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as
+ * required, dependent on the CPU mapping for the memory region.
+ *
+ * The reference taken on pages during kbase_vmap() is released.
+ */
 void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
 
 /** @brief Allocate memory from kernel space and map it onto the GPU
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c
index 153cd4efac49..957061893b00 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_pool.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,6 +43,9 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size)
 		kbase_mem_pool_max_size(pool),	\
 		##__VA_ARGS__)
 
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
 static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
 {
 	spin_lock(&pool->pool_lock);
@@ -261,6 +264,8 @@ static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 
 	for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) {
 		p = kbase_mem_pool_alloc_page(pool);
+		if (!p)
+			break;
 		kbase_mem_pool_add(pool, p);
 	}
 
@@ -505,7 +510,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 	return 0;
 
 err_rollback:
-	kbase_mem_pool_free_pages(pool, i, pages, false);
+	kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
 	return err;
 }
 
@@ -548,7 +553,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
 }
 
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
-		phys_addr_t *pages, bool dirty)
+		phys_addr_t *pages, bool dirty, bool reclaimed)
 {
 	struct kbase_mem_pool *next_pool = pool->next_pool;
 	struct page *p;
@@ -558,22 +563,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 
 	pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
 
-	/* Add to this pool */
-	nr_to_pool = kbase_mem_pool_capacity(pool);
-	nr_to_pool = min(nr_pages, nr_to_pool);
+	if (!reclaimed) {
+		/* Add to this pool */
+		nr_to_pool = kbase_mem_pool_capacity(pool);
+		nr_to_pool = min(nr_pages, nr_to_pool);
 
-	kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+		kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
 
-	i += nr_to_pool;
+		i += nr_to_pool;
 
-	if (i != nr_pages && next_pool) {
-		/* Spill to next pool (may overspill) */
-		nr_to_pool = kbase_mem_pool_capacity(next_pool);
-		nr_to_pool = min(nr_pages - i, nr_to_pool);
+		if (i != nr_pages && next_pool) {
+			/* Spill to next pool (may overspill) */
+			nr_to_pool = kbase_mem_pool_capacity(next_pool);
+			nr_to_pool = min(nr_pages - i, nr_to_pool);
 
-		kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i,
-				true, dirty);
-		i += nr_to_pool;
+			kbase_mem_pool_add_array(next_pool, nr_to_pool,
+					pages + i, true, dirty);
+			i += nr_to_pool;
+		}
 	}
 
 	/* Free any remaining pages to kernel */
@@ -582,6 +589,10 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 			continue;
 
 		p = phys_to_page(pages[i]);
+		if (reclaimed)
+			zone_page_state_add(-1, page_zone(p),
+					NR_SLAB_RECLAIMABLE);
+
 		kbase_mem_pool_free_page(pool, p);
 		pages[i] = 0;
 	}
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c
index 0b19d05c46e8..03594102f7ef 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,7 +15,7 @@
 
 
 
-#include <mali_kbase_gpu_memory_debugfs.h>
+#include <mali_kbase.h>
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h
index 9555197f305c..a1dc2e0b165b 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mem_profile_debugfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,6 @@
 #ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
 #define _KBASE_MEM_PROFILE_DEBUGFS_H
 
-#include <mali_kbase.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c
index d81ef593e928..48d53723a9b4 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,8 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
+#include <mali_kbase_instr_defs.h>
 #include <mali_kbase_debug.h>
 
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
@@ -41,9 +40,31 @@
 #include <mali_kbase_hw.h>
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_time.h>
 
 #define KBASE_MMU_PAGE_ENTRIES 512
 
+/**
+ * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ *
+ * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ *
+ * If sync is not set then transactions still in flight when the flush is issued
+ * may use the old page tables and the data they write will not be written out
+ * to memory, this function returns after the flush has been issued but
+ * before all accesses which might effect the flushed region have completed.
+ *
+ * If sync is set then accesses in the flushed region will be drained
+ * before data is flush and invalidated through L1, L2 and into memory,
+ * after which point this function will return.
+ */
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync);
+
 /**
  * kbase_mmu_sync_pgd - sync page directory to memory
  * @kbdev:	Device pointer.
@@ -56,8 +77,12 @@
 static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
 		dma_addr_t handle, size_t size)
 {
-
-	dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE);
+	/* If page table is not coherent then ensure the gpu can read
+	 * the pages from memory
+	 */
+	if (kbdev->system_coherency != COHERENCY_ACE)
+		dma_sync_single_for_device(kbdev->dev, handle, size,
+				DMA_TO_DEVICE);
 }
 
 /*
@@ -136,6 +161,18 @@ void page_fault_worker(struct work_struct *data)
 		dev_warn(kbdev->dev, "Access flag unexpectedly set");
 		goto fault_done;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Address size fault");
+		goto fault_done;
+
+	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Memory attributes fault");
+		goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 	default:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
@@ -164,6 +201,13 @@ void page_fault_worker(struct work_struct *data)
 		goto fault_done;
 	}
 
+	if ((region->flags & KBASE_REG_DONT_NEED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Don't need memory can't be grown");
+		goto fault_done;
+	}
+
 	/* find the size we need to grow it by */
 	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
 	 * validating the fault_adress to be within a size_t from the start_pfn */
@@ -233,19 +277,27 @@ void page_fault_worker(struct work_struct *data)
 
 
 	if (grown) {
+		u64 pfn_offset;
 		u32 op;
 
 		/* alloc success */
 		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
 
-		/* AS transaction begin */
-		mutex_lock(&faulting_as->transaction_mutex);
-
 		/* set up the new pages */
-		err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
+		/*
+		 * Note:
+		 * Issuing an MMU operation will unlock the MMU and cause the
+		 * translation to be replayed. If the page insertion fails then
+		 * rather then trying to continue the context should be killed
+		 * so the no_flush version of insert_pages is used which allows
+		 * us to unlock the MMU as we see fit.
+		 */
+		err = kbase_mmu_insert_pages_no_flush(kctx,
+				region->start_pfn + pfn_offset,
+				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				new_pages, region->flags);
 		if (err) {
-			/* failed to insert pages, handle as a normal PF */
-			mutex_unlock(&faulting_as->transaction_mutex);
 			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
 			if (region->gpu_alloc != region->cpu_alloc)
 				kbase_free_phy_pages_helper(region->cpu_alloc,
@@ -259,9 +311,10 @@ void page_fault_worker(struct work_struct *data)
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 		kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
-#endif
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
 
 		/* flush L2 and unlock the VA (resumes the MMU) */
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
@@ -324,11 +377,9 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
 	if (!p)
 		goto sub_pages;
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_aux_pagesalloc(
 			(u32)kctx->id,
 			(u64)new_page_count);
-#endif
 
 	page = kmap(p);
 	if (NULL == page)
@@ -365,7 +416,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd,
 	KBASE_DEBUG_ASSERT(pgd);
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->mmu_lock);
 
 	/*
 	 * Architecture spec defines level-0 as being the top-most.
@@ -406,8 +457,9 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
 	phys_addr_t pgd;
 	int l;
 
-	pgd = kctx->pgd;
+	lockdep_assert_held(&kctx->mmu_lock);
 
+	pgd = kctx->pgd;
 	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
 		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
 		/* Handle failure condition */
@@ -428,7 +480,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k
 	KBASE_DEBUG_ASSERT(pgd);
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->mmu_lock);
 
 	/*
 	 * Architecture spec defines level-0 as being the top-most.
@@ -453,6 +505,8 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context
 	phys_addr_t pgd;
 	int l;
 
+	lockdep_assert_held(&kctx->mmu_lock);
+
 	pgd = kctx->pgd;
 
 	for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
@@ -476,7 +530,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->mmu_lock);
 
 	mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -524,22 +578,28 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 	bool recover_required = false;
 	u64 recover_vpfn = vpfn;
 	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(0 != vpfn);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-	lockdep_assert_held(&kctx->reg_lock);
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
 
-	while (nr) {
+	mutex_lock(&kctx->mmu_lock);
+
+	while (remain) {
 		unsigned int i;
 		unsigned int index = vpfn & 0x1FF;
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 		struct page *p;
 
-		if (count > nr)
-			count = nr;
+		if (count > remain)
+			count = remain;
 
 		/*
 		 * Repeatedly calling mmu_get_bottom_pte() is clearly
@@ -558,7 +618,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail_unlock;
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
@@ -572,7 +633,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_unlock;
 		}
 
 		for (i = 0; i < count; i++) {
@@ -584,7 +646,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		}
 
 		vpfn += count;
-		nr -= count;
+		remain -= count;
 
 		kbase_mmu_sync_pgd(kctx->kbdev,
 				kbase_dma_addr(p) + (index * sizeof(u64)),
@@ -597,13 +659,17 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		recover_required = true;
 		recover_count += count;
 	}
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
 	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
 }
 
-/*
- * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
- */
-int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 				  phys_addr_t *phys, size_t nr,
 				  unsigned long flags)
 {
@@ -614,22 +680,28 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 	bool recover_required = false;
 	u64 recover_vpfn = vpfn;
 	size_t recover_count = 0;
+	size_t remain = nr;
+	int err;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(0 != vpfn);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-	lockdep_assert_held(&kctx->reg_lock);
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
 
-	while (nr) {
+	while (remain) {
 		unsigned int i;
 		unsigned int index = vpfn & 0x1FF;
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 		struct page *p;
 
-		if (count > nr)
-			count = nr;
+		if (count > remain)
+			count = remain;
 
 		/*
 		 * Repeatedly calling mmu_get_bottom_pte() is clearly
@@ -648,7 +720,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail_unlock;
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
@@ -662,7 +735,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 								  recover_vpfn,
 								  recover_count);
 			}
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_unlock;
 		}
 
 		for (i = 0; i < count; i++) {
@@ -675,7 +749,7 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 
 		phys += count;
 		vpfn += count;
-		nr -= count;
+		remain -= count;
 
 		kbase_mmu_sync_pgd(kctx->kbdev,
 				kbase_dma_addr(p) + (index * sizeof(u64)),
@@ -688,81 +762,209 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
 		recover_required = true;
 		recover_count += count;
 	}
+
+	mutex_unlock(&kctx->mmu_lock);
 	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	return err;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	int err;
+
+	err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+	return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 
 /**
- * This function is responsible for validating the MMU PTs
- * triggering reguired flushes.
+ * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
  *
- * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
  */
-static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	u32 op;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	if (sync)
+		op = AS_COMMAND_FLUSH_MEM;
+	else
+		op = AS_COMMAND_FLUSH_PT;
+
+	err = kbase_mmu_hw_do_operation(kbdev,
+				&kbdev->as[kctx->as_nr],
+				kctx, vpfn, nr, op, 0);
+#if KBASE_GPU_RESET_EN
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to
+		 * recover */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_NO_MALI
+	/*
+	 * As this function could be called in interrupt context the sync
+	 * request can't block. Instead log the request and the next flush
+	 * request will pick it up.
+	 */
+	if ((!err) && sync &&
+			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+		atomic_set(&kctx->drain_pending, 1);
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
+		u64 vpfn, size_t nr, bool sync)
 {
 	struct kbase_device *kbdev;
 	bool ctx_is_in_runpool;
+#ifndef CONFIG_MALI_NO_MALI
+	bool drain_pending = false;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
+	if (atomic_xchg(&kctx->drain_pending, 0))
+		drain_pending = true;
+#endif /* !CONFIG_MALI_NO_MALI */
 
-	kbdev = kctx->kbdev;
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
 
-	/* We must flush if we're currently running jobs. At the very least, we need to retain the
-	 * context to ensure it doesn't schedule out whilst we're trying to flush it */
+	kbdev = kctx->kbdev;
 	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
 
 	if (ctx_is_in_runpool) {
 		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 
-		/* Second level check is to try to only do this when jobs are running. The refcount is
-		 * a heuristic for this. */
-		if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
-			if (!kbase_pm_context_active_handle_suspend(kbdev,
-				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
-				int ret;
-				u32 op;
-
-				/* AS transaction begin */
-				mutex_lock(&kbdev->as[
-						kctx->as_nr].transaction_mutex);
-
-				if (kbase_hw_has_issue(kbdev,
-						BASE_HW_ISSUE_6367))
-					op = AS_COMMAND_FLUSH;
-				else
-					op = AS_COMMAND_FLUSH_MEM;
-
-				ret = kbase_mmu_hw_do_operation(kbdev,
-							&kbdev->as[kctx->as_nr],
-							kctx, vpfn, nr,
-							op, 0);
+		if (!kbase_pm_context_active_handle_suspend(kbdev,
+			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			int err;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
+
+			if (sync)
+				op = AS_COMMAND_FLUSH_MEM;
+			else
+				op = AS_COMMAND_FLUSH_PT;
+
+			err = kbase_mmu_hw_do_operation(kbdev,
+						&kbdev->as[kctx->as_nr],
+						kctx, vpfn, nr, op, 0);
+
 #if KBASE_GPU_RESET_EN
-				if (ret) {
-					/* Flush failed to complete, assume the
-					 * GPU has hung and perform a reset to
-					 * recover */
-					dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
-					if (kbase_prepare_to_reset_gpu(kbdev))
-						kbase_reset_gpu(kbdev);
-				}
+			if (err) {
+				/* Flush failed to complete, assume the
+				 * GPU has hung and perform a reset to
+				 * recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
 #endif /* KBASE_GPU_RESET_EN */
 
-				mutex_unlock(&kbdev->as[
-						kctx->as_nr].transaction_mutex);
-				/* AS transaction end */
+			mutex_unlock(&kbdev->as[
+					kctx->as_nr].transaction_mutex);
+			/* AS transaction end */
 
-				kbase_pm_context_idle(kbdev);
+#ifndef CONFIG_MALI_NO_MALI
+			/*
+			 * The transaction lock must be dropped before here
+			 * as kbase_wait_write_flush could take it if
+			 * the GPU was powered down (static analysis doesn't
+			 * know this can't happen).
+			 */
+			drain_pending |= (!err) && sync &&
+					kbase_hw_has_issue(kctx->kbdev,
+							BASE_HW_ISSUE_6367);
+			if (drain_pending) {
+				/* Wait for GPU to flush write buffer */
+				kbase_wait_write_flush(kctx);
 			}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+			kbase_pm_context_idle(kbdev);
 		}
 		kbasep_js_runpool_release_ctx(kbdev, kctx);
 	}
 }
 
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+
+	kctx->kbdev->mmu_mode->update(kctx);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+	lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex);
+	lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
+	 */
+	kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true);
+
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
 /*
  * We actually only discard the ATE, and not the page table
  * pages. There is a potential DoS here, as we'll leak memory by
@@ -782,17 +984,18 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 	struct kbase_device *kbdev;
 	size_t requested_nr = nr;
 	struct kbase_mmu_mode const *mmu_mode;
+	int err;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
 
-	lockdep_assert_held(&kctx->reg_lock);
-
 	if (0 == nr) {
 		/* early out if nothing to do */
 		return 0;
 	}
 
+	mutex_lock(&kctx->mmu_lock);
+
 	kbdev = kctx->kbdev;
 	mmu_mode = kbdev->mmu_mode;
 
@@ -808,14 +1011,16 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 		pgd = mmu_get_bottom_pgd(kctx, vpfn);
 		if (!pgd) {
 			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail_unlock;
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
 			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_unlock;
 		}
 
 		for (i = 0; i < count; i++)
@@ -831,8 +1036,14 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
 		kunmap(p);
 	}
 
-	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
 	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
@@ -855,12 +1066,17 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
 	u64 *pgd_page;
 	size_t requested_nr = nr;
 	struct kbase_mmu_mode const *mmu_mode;
+	int err;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(0 != vpfn);
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-	lockdep_assert_held(&kctx->reg_lock);
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	mutex_lock(&kctx->mmu_lock);
 
 	mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -879,14 +1095,16 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
 		pgd = mmu_get_bottom_pgd(kctx, vpfn);
 		if (!pgd) {
 			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail_unlock;
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
 			dev_warn(kctx->kbdev->dev, "kmap failure\n");
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto fail_unlock;
 		}
 
 		for (i = 0; i < count; i++)
@@ -904,9 +1122,14 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
 		kunmap(pfn_to_page(PFN_DOWN(pgd)));
 	}
 
-	kbase_mmu_flush(kctx, vpfn, requested_nr);
-
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
 	return 0;
+
+fail_unlock:
+	mutex_unlock(&kctx->mmu_lock);
+	kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+	return err;
 }
 
 /* This is a debug feature only */
@@ -934,7 +1157,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int
 	struct kbase_mmu_mode const *mmu_mode;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->mmu_lock);
 
 	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
 	/* kmap_atomic should NEVER fail. */
@@ -979,6 +1202,8 @@ int kbase_mmu_init(struct kbase_context *kctx)
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
 
+	mutex_init(&kctx->mmu_lock);
+
 	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
 	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
 
@@ -1004,9 +1229,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx)
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
 
-	lockdep_assert_held(&kctx->reg_lock);
-
+	mutex_lock(&kctx->mmu_lock);
 	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+	mutex_unlock(&kctx->mmu_lock);
 
 	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
 	kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
@@ -1014,11 +1239,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx)
 	new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
 	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 	kbase_tlstream_aux_pagesalloc(
 			(u32)kctx->id,
 			(u64)new_page_count);
-#endif
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
@@ -1033,7 +1256,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 	struct kbase_mmu_mode const *mmu_mode;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
-	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->mmu_lock);
 
 	mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -1088,13 +1311,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 
 	KBASE_DEBUG_ASSERT(kctx);
 
-	lockdep_assert_held(&kctx->reg_lock);
-
 	if (0 == nr_pages) {
 		/* can't dump in a 0 sized buffer, early out */
 		return NULL;
 	}
 
+	mutex_lock(&kctx->mmu_lock);
+
 	size_left = nr_pages * PAGE_SIZE;
 
 	KBASE_DEBUG_ASSERT(0 != size_left);
@@ -1116,7 +1339,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 			kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
 			config[0] = as_setup.transtab;
 			config[1] = as_setup.memattr;
-			config[2] = 0;
+			config[2] = as_setup.transcfg;
 			memcpy(buffer, &config, sizeof(config));
 			mmu_dump_buffer += sizeof(config);
 			size_left -= sizeof(config);
@@ -1130,10 +1353,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 				&mmu_dump_buffer,
 				&size_left);
 
-		if (!size) {
-			vfree(kaddr);
-			return NULL;
-		}
+		if (!size)
+			goto fail_free;
 
 		/* Add on the size for the end marker */
 		size += sizeof(u64);
@@ -1144,15 +1365,20 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 
 		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
 			/* The buffer isn't big enough - free the memory and return failure */
-			vfree(kaddr);
-			return NULL;
+			goto fail_free;
 		}
 
 		/* Add the end marker */
 		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
 	}
 
+	mutex_unlock(&kctx->mmu_lock);
 	return kaddr;
+
+fail_free:
+	vfree(kaddr);
+	mutex_unlock(&kctx->mmu_lock);
+	return NULL;
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
 
@@ -1193,13 +1419,17 @@ void bus_fault_worker(struct work_struct *data)
 #endif /* KBASE_GPU_RESET_EN */
 	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
 	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		unsigned long flags;
 
 		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
 		/* AS transaction begin */
 		mutex_lock(&kbdev->as[as_no].transaction_mutex);
 
 		/* Set the MMU into unmapped mode */
-		kbase_mmu_disable_as(kbdev, as_no);
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+		kbase_mmu_disable(kctx);
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+				flags);
 
 		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
 		/* AS transaction end */
@@ -1317,6 +1547,15 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
 		e = "TRANSLATION_FAULT";
 		break;
 	case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xC9:
+	case 0xCA:
+	case 0xCB:
+	case 0xCC:
+	case 0xCD:
+	case 0xCE:
+	case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 		e = "PERMISSION_FAULT";
 		break;
 	case 0xD0:
@@ -1330,8 +1569,38 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
 		e = "TRANSTAB_BUS_FAULT";
 		break;
 	case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xD9:
+	case 0xDA:
+	case 0xDB:
+	case 0xDC:
+	case 0xDD:
+	case 0xDE:
+	case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 		e = "ACCESS_FLAG";
 		break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	case 0xE0:
+	case 0xE1:
+	case 0xE2:
+	case 0xE3:
+	case 0xE4:
+	case 0xE5:
+	case 0xE6:
+	case 0xE7:
+		e = "ADDRESS_SIZE_FAULT";
+		break;
+	case 0xE8:
+	case 0xE9:
+	case 0xEA:
+	case 0xEB:
+	case 0xEC:
+	case 0xED:
+	case 0xEE:
+	case 0xEF:
+		e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 		break;
 	default:
 		e = "UNKNOWN";
@@ -1345,7 +1614,12 @@ static const char *access_type_name(struct kbase_device *kbdev,
 		u32 fault_status)
 {
 	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		return "ATOMIC";
+#else
 		return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
 		return "READ";
 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
@@ -1441,7 +1715,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 	}
 #endif /* KBASE_GPU_RESET_EN */
 	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
-	kbase_mmu_disable_as(kbdev, as_no);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_mmu_disable(kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 
 	mutex_unlock(&as->transaction_mutex);
 	/* AS transaction end */
@@ -1678,8 +1954,15 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex
 		 */
 		kbasep_js_clear_submit_allowed(js_devdata, kctx);
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+		dev_warn(kbdev->dev,
+				"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+				as->number, as->fault_addr,
+				as->fault_extra_addr);
+#else
 		dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
 				as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 		/*
 		 * We need to switch to UNMAPPED mode - but we do this in a
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c
index 079ef81d06d1..683cabb797db 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_mmu_mode_lpae.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -77,17 +77,25 @@ static void mmu_get_as_setup(struct kbase_context *kctx,
 		(AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
 		(AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
 		(AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
-		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+		(AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
 		(AS_MEMATTR_LPAE_WRITE_ALLOC           <<
-		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+		(AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+		(AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+		(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+		(AS_MEMATTR_LPAE_OUTER_WA              <<
+		(AS_MEMATTR_INDEX_OUTER_WA * 8))              |
 		0; /* The other indices are unused for now */
 
-	setup->transtab = (u64)kctx->pgd &
-		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK);
-
-	setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE;
-	setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER;
+	setup->transtab = ((u64)kctx->pgd &
+		((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+		AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+		AS_TRANSTAB_LPAE_READ_INNER;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+	setup->transcfg = 0;
+#endif
 }
 
 static void mmu_update(struct kbase_context *kctx)
@@ -109,6 +117,9 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 
 	current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+	current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
 
 	/* Apply the address space setting */
 	kbase_mmu_hw_configure(kbdev, as, NULL);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c
index 5bbd6d485638..1a44957fe44a 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_platform_fake.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,9 +23,6 @@
 #include <linux/platform_device.h>
 #include <linux/string.h>
 
-#ifdef CONFIG_MACH_MANTA
-#include <plat/devs.h>
-#endif
 
 /*
  * This file is included only for type definitions and functions belonging to
@@ -62,6 +59,7 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io
 	linux_resources[0].start = io_resources->io_memory_region.start;
 	linux_resources[0].end   = io_resources->io_memory_region.end;
 	linux_resources[0].flags = IORESOURCE_MEM;
+
 	linux_resources[1].start = io_resources->job_irq_number;
 	linux_resources[1].end   = io_resources->job_irq_number;
 	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c
index 261441fa145b..97d543464c28 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_pm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,10 +21,10 @@
  * @file mali_kbase_pm.c
  * Base kernel power management APIs
  */
+
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
-#include <mali_kbase_config_defaults.h>
-#include <mali_kbase_instr.h>
+#include <mali_kbase_vinstr.h>
 
 #include <mali_kbase_pm.h>
 
@@ -151,6 +151,10 @@ void kbase_pm_suspend(struct kbase_device *kbdev)
 {
 	KBASE_DEBUG_ASSERT(kbdev);
 
+	/* Suspend vinstr.
+	 * This call will block until vinstr is suspended. */
+	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+
 	mutex_lock(&kbdev->pm.lock);
 	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
 	kbdev->pm.suspending = true;
@@ -164,9 +168,6 @@ void kbase_pm_suspend(struct kbase_device *kbdev)
 	 * the PM active count references */
 	kbasep_js_suspend(kbdev);
 
-	/* Suspend any counter collection that might be happening */
-	kbase_instr_hwcnt_suspend(kbdev);
-
 	/* Wait for the active count to reach zero. This is not the same as
 	 * waiting for a power down, since not all policies power down when this
 	 * reaches zero. */
@@ -186,9 +187,6 @@ void kbase_pm_resume(struct kbase_device *kbdev)
 	/* Initial active call, to power on the GPU/cores if needed */
 	kbase_pm_context_active(kbdev);
 
-	/* Re-enable instrumentation, if it was previously disabled */
-	kbase_instr_hwcnt_resume(kbdev);
-
 	/* Resume any blocked atoms (which may cause contexts to be scheduled in
 	 * and dependent atoms to run) */
 	kbase_resume_suspended_soft_jobs(kbdev);
@@ -200,5 +198,8 @@ void kbase_pm_resume(struct kbase_device *kbdev)
 	/* Matching idle call, to power off the GPU/cores if we didn't actually
 	 * need it and the policy doesn't want it on */
 	kbase_pm_context_idle(kbdev);
+
+	/* Resume vinstr operation */
+	kbase_vinstr_resume(kbdev->vinstr_ctx);
 }
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c
new file mode 100644
index 000000000000..c970650069cd
--- /dev/null
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+
+#include "mali_kbase_regs_history_debugfs.h"
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+#include <linux/debugfs.h>
+
+
+static int regs_history_size_get(void *data, u64 *val)
+{
+	struct kbase_io_history *const h = data;
+
+	*val = h->size;
+
+	return 0;
+}
+
+static int regs_history_size_set(void *data, u64 val)
+{
+	struct kbase_io_history *const h = data;
+
+	return kbase_io_history_resize(h, (u16)val);
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
+		regs_history_size_get,
+		regs_history_size_set,
+		"%llu\n");
+
+
+/**
+ * regs_history_show - show callback for the register access history file.
+ *
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to dump all recent accesses to the GPU registers.
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int regs_history_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_io_history *const h = sfile->private;
+	u16 i;
+	size_t iters;
+	unsigned long flags;
+
+	if (!h->enabled) {
+		seq_puts(sfile, "The register access history is disabled\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&h->lock, flags);
+
+	iters = (h->size > h->count) ? h->count : h->size;
+	seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters,
+			h->count);
+	for (i = 0; i < iters; ++i) {
+		struct kbase_io_access *io =
+			&h->buf[(h->count - iters + i) % h->size];
+		char const access = (io->addr & 1) ? 'w' : 'r';
+
+		seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access,
+				(void *)(io->addr & ~0x1), io->value);
+	}
+
+	spin_unlock_irqrestore(&h->lock, flags);
+
+out:
+	return 0;
+}
+
+
+/**
+ * regs_history_open - open operation for regs_history debugfs file
+ *
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * @return file descriptor
+ */
+static int regs_history_open(struct inode *in, struct file *file)
+{
+	return single_open(file, &regs_history_show, in->i_private);
+}
+
+
+static const struct file_operations regs_history_fops = {
+	.open = &regs_history_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
+{
+	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history.enabled);
+	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+			kbdev->mali_debugfs_directory,
+			&kbdev->io_history, &regs_history_size_fops);
+	debugfs_create_file("regs_history", S_IRUGO,
+			kbdev->mali_debugfs_directory, &kbdev->io_history,
+			&regs_history_fops);
+}
+
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h
new file mode 100644
index 000000000000..f10837002330
--- /dev/null
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_regs_history_debugfs.h
@@ -0,0 +1,50 @@
+/*
+ *
+ * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Header file for register access history support via debugfs
+ *
+ * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
+ *
+ * Usage:
+ * - regs_history_enabled: whether recording of register accesses is enabled.
+ *   Write 'y' to enable, 'n' to disable.
+ * - regs_history_size: size of the register history buffer, must be > 0
+ * - regs_history: return the information about last accesses to the registers.
+ */
+
+#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H
+#define _KBASE_REGS_HISTORY_DEBUGFS_H
+
+struct kbase_device;
+
+#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI)
+
+/**
+ * kbasep_regs_history_debugfs_init - add debugfs entries for register history
+ *
+ * @kbdev: Pointer to kbase_device containing the register history
+ */
+void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
+
+#else /* CONFIG_DEBUG_FS */
+
+#define kbasep_regs_history_debugfs_init CSTD_NOP
+
+#endif /* CONFIG_DEBUG_FS */
+
+#endif  /*_KBASE_REGS_HISTORY_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c
index 71f005e32521..d3a3dbfa5241 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_replay.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,15 +27,11 @@
 #include <mali_kbase_mem_linux.h>
 
 #define JOB_NOT_STARTED 0
-#define JOB_TYPE_MASK      0xfe
-#define JOB_TYPE_NULL      (1 << 1)
-#define JOB_TYPE_VERTEX    (5 << 1)
-#define JOB_TYPE_TILER     (7 << 1)
-#define JOB_TYPE_FUSED     (8 << 1)
-#define JOB_TYPE_FRAGMENT  (9 << 1)
-
-#define JOB_FLAG_DESC_SIZE           (1 << 0)
-#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8)
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
 
 #define JOB_HEADER_32_FBD_OFFSET (31*4)
 #define JOB_HEADER_64_FBD_OFFSET (44*4)
@@ -58,17 +54,9 @@
 #define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
 #define JOB_POLYGON_LIST		(0x03)
 
-struct job_head {
-	u32 status;
-	u32 not_complete_index;
-	u64 fault_addr;
-	u16 flags;
-	u16 index;
-	u16 dependencies[2];
-	union {
-		u64 _64;
-		u32 _32;
-	} next;
+struct fragment_job {
+	struct job_descriptor_header header;
+
 	u32 x[2];
 	union {
 		u64 _64;
@@ -77,28 +65,43 @@ struct job_head {
 };
 
 static void dump_job_head(struct kbase_context *kctx, char *head_str,
-		struct job_head *job)
+		struct job_descriptor_header *job)
 {
 #ifdef CONFIG_MALI_DEBUG
 	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
-	dev_dbg(kctx->kbdev->dev, "addr               = %p\n"
-			"status             = %x\n"
-			"not_complete_index = %x\n"
-			"fault_addr         = %llx\n"
-			"flags              = %x\n"
-			"index              = %x\n"
-			"dependencies       = %x,%x\n",
-			job, job->status, job->not_complete_index,
-			job->fault_addr, job->flags, job->index,
-			job->dependencies[0],
-			job->dependencies[1]);
-
-	if (job->flags & JOB_FLAG_DESC_SIZE)
+	dev_dbg(kctx->kbdev->dev,
+			"addr                  = %p\n"
+			"exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+			"first_incomplete_task = %x\n"
+			"fault_pointer         = %llx\n"
+			"job_descriptor_size   = %x\n"
+			"job_type              = %x\n"
+			"job_barrier           = %x\n"
+			"_reserved_01          = %x\n"
+			"_reserved_02          = %x\n"
+			"_reserved_03          = %x\n"
+			"_reserved_04/05       = %x,%x\n"
+			"job_index             = %x\n"
+			"dependencies          = %x,%x\n",
+			job, job->exception_status,
+			JOB_SOURCE_ID(job->exception_status),
+			(job->exception_status >> 8) & 0x3,
+			job->exception_status  & 0xFF,
+			job->first_incomplete_task,
+			job->fault_pointer, job->job_descriptor_size,
+			job->job_type, job->job_barrier, job->_reserved_01,
+			job->_reserved_02, job->_reserved_03,
+			job->_reserved_04, job->_reserved_05,
+			job->job_index,
+			job->job_dependency_index_1,
+			job->job_dependency_index_2);
+
+	if (job->job_descriptor_size)
 		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
-				job->next._64);
+				job->next_job._64);
 	else
 		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
-				job->next._32);
+				job->next_job._32);
 #endif
 }
 
@@ -372,77 +375,81 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx,
 		u32 default_weight, u16 hw_job_id_offset,
 		bool first_in_chain, bool fragment_chain)
 {
-	struct job_head *job;
+	struct fragment_job *frag_job;
+	struct job_descriptor_header *job;
 	u64 new_job_header;
 	struct kbase_vmap_struct map;
 
-	job = kbase_vmap(kctx, *job_header, sizeof(*job), &map);
-	if (!job) {
+	frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+	if (!frag_job) {
 		dev_err(kctx->kbdev->dev,
 				 "kbasep_replay_parse_jc: failed to map jc\n");
 		return -EINVAL;
 	}
+	job = &frag_job->header;
 
 	dump_job_head(kctx, "Job header:", job);
 
-	if (job->status == JOB_NOT_STARTED && !fragment_chain) {
+	if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
 		dev_err(kctx->kbdev->dev, "Job already not started\n");
 		goto out_unmap;
 	}
-	job->status = JOB_NOT_STARTED;
+	job->exception_status = JOB_NOT_STARTED;
 
-	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX)
-		job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL;
+	if (job->job_type == JOB_TYPE_VERTEX)
+		job->job_type = JOB_TYPE_NULL;
 
-	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) {
+	if (job->job_type == JOB_TYPE_FUSED) {
 		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
 		goto out_unmap;
 	}
 
 	if (first_in_chain)
-		job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER;
+		job->job_barrier = 1;
 
-	if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
-	    (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
-	    (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+	if ((job->job_dependency_index_1 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_dependency_index_2 + hw_job_id_offset) >
+			JOB_HEADER_ID_MAX ||
+	    (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
 		dev_err(kctx->kbdev->dev,
 			     "Job indicies/dependencies out of valid range\n");
 		goto out_unmap;
 	}
 
-	if (job->dependencies[0])
-		job->dependencies[0] += hw_job_id_offset;
-	if (job->dependencies[1])
-		job->dependencies[1] += hw_job_id_offset;
+	if (job->job_dependency_index_1)
+		job->job_dependency_index_1 += hw_job_id_offset;
+	if (job->job_dependency_index_2)
+		job->job_dependency_index_2 += hw_job_id_offset;
 
-	job->index += hw_job_id_offset;
+	job->job_index += hw_job_id_offset;
 
-	if (job->flags & JOB_FLAG_DESC_SIZE) {
-		new_job_header = job->next._64;
-		if (!job->next._64)
-			job->next._64 = prev_jc;
+	if (job->job_descriptor_size) {
+		new_job_header = job->next_job._64;
+		if (!job->next_job._64)
+			job->next_job._64 = prev_jc;
 	} else {
-		new_job_header = job->next._32;
-		if (!job->next._32)
-			job->next._32 = prev_jc;
+		new_job_header = job->next_job._32;
+		if (!job->next_job._32)
+			job->next_job._32 = prev_jc;
 	}
 	dump_job_head(kctx, "Updated to:", job);
 
-	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) {
-		bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0;
+	if (job->job_type == JOB_TYPE_TILER) {
+		bool job_64 = job->job_descriptor_size != 0;
 
 		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
 				tiler_heap_free, hierarchy_mask,
 				default_weight, job_64) != 0)
 			goto out_unmap;
 
-	} else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) {
+	} else if (job->job_type == JOB_TYPE_FRAGMENT) {
 		u64 fbd_address;
 
-		if (job->flags & JOB_FLAG_DESC_SIZE)
-			fbd_address = job->fragment_fbd._64;
+		if (job->job_descriptor_size)
+			fbd_address = frag_job->fragment_fbd._64;
 		else
-			fbd_address = (u64)job->fragment_fbd._32;
+			fbd_address = (u64)frag_job->fragment_fbd._32;
 
 		if (fbd_address & FBD_TYPE) {
 			if (kbasep_replay_reset_mfbd(kctx,
@@ -485,7 +492,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
 		u64 jc,	u16 *hw_job_id)
 {
 	while (jc) {
-		struct job_head *job;
+		struct job_descriptor_header *job;
 		struct kbase_vmap_struct map;
 
 		dev_dbg(kctx->kbdev->dev,
@@ -498,13 +505,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
 			return -EINVAL;
 		}
 
-		if (job->index > *hw_job_id)
-			*hw_job_id = job->index;
+		if (job->job_index > *hw_job_id)
+			*hw_job_id = job->job_index;
 
-		if (job->flags & JOB_FLAG_DESC_SIZE)
-			jc = job->next._64;
+		if (job->job_descriptor_size)
+			jc = job->next_job._64;
 		else
-			jc = job->next._32;
+			jc = job->next_job._32;
 
 		kbase_vunmap(kctx, &map);
 	}
@@ -749,7 +756,7 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx,
 					      struct base_jd_atom_v2 *t_atom,
 					      struct base_jd_atom_v2 *f_atom)
 {
-	base_jd_replay_payload *payload;
+	base_jd_replay_payload *payload = NULL;
 	u64 next;
 	u64 prev_jc = 0;
 	u16 hw_job_id_offset = 0;
@@ -760,12 +767,27 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx,
 			replay_atom->jc, sizeof(payload));
 
 	payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
-
 	if (!payload) {
 		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
 		return -EINVAL;
 	}
 
+#ifdef BASE_LEGACY_UK10_2_SUPPORT
+	if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) {
+		base_jd_replay_payload_uk10_2 *payload_uk10_2;
+		u16 tiler_core_req;
+		u16 fragment_core_req;
+
+		payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload;
+		memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req,
+				sizeof(tiler_core_req));
+		memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req,
+				sizeof(fragment_core_req));
+		payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff);
+		payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff);
+	}
+#endif /* BASE_LEGACY_UK10_2_SUPPORT */
+
 #ifdef CONFIG_MALI_DEBUG
 	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
 	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
@@ -787,18 +809,35 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx,
 						   payload->fragment_core_req);
 	payload_dump(kctx, payload);
 #endif
-
 	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
 	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
 
 	/* Sanity check core requirements*/
-	if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
-			       ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
-	    (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
-			      ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS ||
+	if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS ||
 	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
 	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
-		dev_err(kctx->kbdev->dev, "Invalid core requirements\n");
+
+		int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP;
+		int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC;
+		int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+		int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES;
+
+		if (t_atom_type != BASE_JD_REQ_T) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x",
+			    t_atom_type, BASE_JD_REQ_T);
+		}
+		if (f_atom_type != BASE_JD_REQ_FS) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n",
+			    f_atom_type, BASE_JD_REQ_FS);
+		}
+		if (t_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n");
+		}
+		if (f_has_ex_res) {
+			dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n");
+		}
+
 		goto out;
 	}
 
@@ -957,7 +996,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
 	base_jd_replay_payload *payload;
 	u64 job_header;
 	u64 job_loop_detect;
-	struct job_head *job;
+	struct job_descriptor_header *job;
 	struct kbase_vmap_struct job_map;
 	struct kbase_vmap_struct map;
 	bool err = false;
@@ -1012,41 +1051,22 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
 		}
 
 
-#ifdef CONFIG_MALI_DEBUG
-		dev_dbg(dev, "\njob_head structure:\n"
-			     "Source ID:0x%x Access:0x%x Exception:0x%x\n"
-			     "at job addr               = %p\n"
-			     "not_complete_index        = 0x%x\n"
-			     "fault_addr                = 0x%llx\n"
-			     "flags                     = 0x%x\n"
-			     "index                     = 0x%x\n"
-			     "dependencies              = 0x%x,0x%x\n",
-			     JOB_SOURCE_ID(job->status),
-			     ((job->status >> 8) & 0x3),
-			     (job->status  & 0xFF),
-			     job,
-			     job->not_complete_index,
-			     job->fault_addr,
-			     job->flags,
-			     job->index,
-			     job->dependencies[0],
-			     job->dependencies[1]);
-#endif
+		dump_job_head(kctx, "\njob_head structure:\n", job);
 
 		/* Replay only when the polygon list reader caused the
 		 * DATA_INVALID_FAULT */
 		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
-		    (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) {
+		   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
 			err = true;
 			kbase_vunmap(kctx, &job_map);
 			break;
 		}
 
 		/* Move on to next fragment job in the list */
-		if (job->flags & JOB_FLAG_DESC_SIZE)
-			job_header = job->next._64;
+		if (job->job_descriptor_size)
+			job_header = job->next_job._64;
 		else
-			job_header = job->next._32;
+			job_header = job->next_job._32;
 
 		kbase_vunmap(kctx, &job_map);
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c
index 108c49d9150a..07b862546f80 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_softjobs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -19,14 +19,23 @@
 
 #include <mali_kbase.h>
 
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#include <asm/cacheflush.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
 #include <linux/dma-mapping.h>
 #ifdef CONFIG_SYNC
 #include "sync.h"
 #include <linux/syscalls.h>
 #include "mali_kbase_sync.h"
 #endif
+#include <mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
 #include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
 
 /* Mask to check cache alignment of data structures */
 #define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
@@ -38,13 +47,91 @@
  * executed within the driver rather than being handed over to the GPU.
  */
 
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_add_tail(&katom->queue, &kctx->waiting_soft_jobs);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_del(&katom->queue);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Record the start time of this atom so we could cancel it at
+	 * the right time.
+	 */
+	katom->start_timestamp = ktime_get();
+
+	/* Add the atom to the waiting list before the timer is
+	 * (re)started to make sure that it gets processed.
+	 */
+	kbasep_add_waiting_soft_job(katom);
+
+	/* Schedule timeout of this atom after a period if it is not active */
+	if (!timer_pending(&kctx->soft_job_timeout)) {
+		int timeout_ms = atomic_read(
+				&kctx->kbdev->js_data.soft_job_timeout_ms);
+		mod_timer(&kctx->soft_job_timeout,
+			  jiffies + msecs_to_jiffies(timeout_ms));
+	}
+}
+
+static int kbasep_read_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*status = *mapped_evt;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
+static int kbasep_write_soft_event_status(
+		struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+	unsigned char *mapped_evt;
+	struct kbase_vmap_struct map;
+
+	if ((new_status != BASE_JD_SOFT_EVENT_SET) &&
+	    (new_status != BASE_JD_SOFT_EVENT_RESET))
+		return -EINVAL;
+
+	mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map);
+	if (!mapped_evt)
+		return -EFAULT;
+
+	*mapped_evt = new_status;
+
+	kbase_vunmap(kctx, &map);
+
+	return 0;
+}
+
 static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 {
-	struct kbase_va_region *reg;
-	phys_addr_t addr = 0;
-	u64 pfn;
-	u32 offset;
-	char *page;
+	struct kbase_vmap_struct map;
+	void *user_result;
 	struct timespec ts;
 	struct base_dump_cpu_gpu_counters data;
 	u64 system_time;
@@ -63,11 +150,16 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
 
 		/* We're suspended - queue this on the list of suspended jobs
-		 * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+		 * Use dep_item[1], because dep_item[0] was previously in use
+		 * for 'waiting_soft_jobs'.
+		 */
 		mutex_lock(&js_devdata->runpool_mutex);
 		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
 		mutex_unlock(&js_devdata->runpool_mutex);
 
+		/* Also adding this to the list of waiting soft job */
+		kbasep_add_waiting_soft_job(katom);
+
 		return pm_active_err;
 	}
 
@@ -81,44 +173,20 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 	data.system_time = system_time;
 	data.cycle_counter = cycle_counter;
 
-	pfn = jc >> PAGE_SHIFT;
-	offset = jc & ~PAGE_MASK;
-
 	/* Assume this atom will be cancelled until we know otherwise */
 	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-	if (offset > 0x1000 - sizeof(data)) {
-		/* Wouldn't fit in the page */
-		return 0;
-	}
-
-	kbase_gpu_vm_lock(kctx);
-	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
-	if (reg &&
-	    (reg->flags & KBASE_REG_GPU_WR) &&
-	    reg->cpu_alloc && reg->cpu_alloc->pages)
-		addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
-
-	kbase_gpu_vm_unlock(kctx);
-	if (!addr)
-		return 0;
 
-	page = kmap(pfn_to_page(PFN_DOWN(addr)));
-	if (!page)
+	/* GPU_WR access is checked on the range for returning the result to
+	 * userspace for the following reasons:
+	 * - security, this is currently how imported user bufs are checked.
+	 * - userspace ddk guaranteed to assume region was mapped as GPU_WR */
+	user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map);
+	if (!user_result)
 		return 0;
 
-	kbase_sync_single_for_cpu(katom->kctx->kbdev,
-			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
-			offset, sizeof(data),
-			DMA_BIDIRECTIONAL);
+	memcpy(user_result, &data, sizeof(data));
 
-	memcpy(page + offset, &data, sizeof(data));
-
-	kbase_sync_single_for_device(katom->kctx->kbdev,
-			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
-			offset, sizeof(data),
-			DMA_BIDIRECTIONAL);
-
-	kunmap(pfn_to_page(PFN_DOWN(addr)));
+	kbase_vunmap(kctx, &map);
 
 	/* Atom was fine - mark it as done */
 	katom->event_code = BASE_JD_EVENT_DONE;
@@ -128,22 +196,6 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 
 #ifdef CONFIG_SYNC
 
-/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
- *
- * @param katom     The atom to complete
- */
-static void complete_soft_job(struct kbase_jd_atom *katom)
-{
-	struct kbase_context *kctx = katom->kctx;
-
-	mutex_lock(&kctx->jctx.lock);
-	list_del(&katom->dep_item[0]);
-	kbase_finish_soft_job(katom);
-	if (jd_done_nolock(katom, NULL))
-		kbase_js_sched_all(kctx->kbdev);
-	mutex_unlock(&kctx->jctx.lock);
-}
-
 static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
 {
 	struct sync_pt *pt;
@@ -185,7 +237,12 @@ static void kbase_fence_wait_worker(struct work_struct *data)
 	katom = container_of(data, struct kbase_jd_atom, work);
 	kctx = katom->kctx;
 
-	complete_soft_job(katom);
+	mutex_lock(&kctx->jctx.lock);
+	kbasep_remove_waiting_soft_job(katom);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
 }
 
 static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
@@ -202,11 +259,7 @@ static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fenc
 	/* Propagate the fence status to the atom.
 	 * If negative then cancel this atom and its dependencies.
 	 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
-	if (fence->status < 0)
-#else
-	if (atomic_read(&fence->status) < 0)
-#endif
+	if (kbase_fence_get_status(fence) < 0)
 		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 
 	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
@@ -234,18 +287,25 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom)
 	if (ret == 1) {
 		/* Already signalled */
 		return 0;
-	} else if (ret < 0) {
-		goto cancel_atom;
 	}
-	return 1;
 
- cancel_atom:
-	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-	/* We should cause the dependant jobs in the bag to be failed,
-	 * to do this we schedule the work queue to complete this job */
-	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
-	INIT_WORK(&katom->work, kbase_fence_wait_worker);
-	queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	if (ret < 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		/* We should cause the dependent jobs in the bag to be failed,
+		 * to do this we schedule the work queue to complete this job */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+		INIT_WORK(&katom->work, kbase_fence_wait_worker);
+		queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	/* The timeout code will add this job to the list of waiting soft jobs.
+	 */
+	kbasep_add_waiting_with_timeout(katom);
+#else
+	kbasep_add_waiting_soft_job(katom);
+#endif
+
 	return 1;
 }
 
@@ -276,6 +336,7 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
 finish_softjob:
 	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 
+	kbasep_remove_waiting_soft_job(katom);
 	kbase_finish_soft_job(katom);
 
 	if (jd_done_nolock(katom, NULL))
@@ -283,9 +344,959 @@ finish_softjob:
 }
 #endif /* CONFIG_SYNC */
 
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+	struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+			work);
+	struct kbase_context *kctx = katom->kctx;
+	int resched;
+
+	mutex_lock(&kctx->jctx.lock);
+	resched = jd_done_nolock(katom, NULL);
+	mutex_unlock(&kctx->jctx.lock);
+
+	if (resched)
+		kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+	int cancel_timer = 1;
+	struct list_head *entry, *tmp;
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(
+				entry, struct kbase_jd_atom, queue);
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			if (katom->jc == evt) {
+				list_del(&katom->queue);
+
+				katom->event_code = BASE_JD_EVENT_DONE;
+				INIT_WORK(&katom->work,
+					  kbasep_soft_event_complete_job);
+				queue_work(kctx->jctx.job_done_wq,
+					   &katom->work);
+			} else {
+				/* There are still other waiting jobs, we cannot
+				 * cancel the timer yet.
+				 */
+				cancel_timer = 0;
+			}
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			/* Keep the timer running if fence debug is enabled and
+			 * there are waiting fence jobs.
+			 */
+			cancel_timer = 0;
+			break;
+#endif
+		}
+	}
+
+	if (cancel_timer)
+		del_timer(&kctx->soft_job_timeout);
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+static char *kbase_fence_debug_status_string(int status)
+{
+	if (status == 0)
+		return "signaled";
+	else if (status > 0)
+		return "active";
+	else
+		return "error";
+}
+
+static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct kbase_jd_atom *dep;
+
+		list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) {
+			if (dep->status == KBASE_JD_ATOM_STATE_UNUSED ||
+			    dep->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				continue;
+
+			if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
+					== BASE_JD_REQ_SOFT_FENCE_TRIGGER) {
+				struct sync_fence *fence = dep->fence;
+				int status = kbase_fence_get_status(fence);
+
+				/* Found blocked trigger fence. */
+				dev_warn(dev,
+					 "\tVictim trigger atom %d fence [%p] %s: %s\n",
+					 kbase_jd_atom_id(kctx, dep),
+					 fence, fence->name,
+					 kbase_fence_debug_status_string(status));
+			}
+
+			kbase_fence_debug_check_atom(dep);
+		}
+	}
+}
+
+static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = katom->kctx->kbdev->dev;
+	struct sync_fence *fence = katom->fence;
+	int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms);
+	int status = kbase_fence_get_status(fence);
+	unsigned long lflags;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+
+	dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n",
+		 kctx->tgid, kctx->id,
+		 kbase_jd_atom_id(kctx, katom),
+		 fence, timeout_ms);
+	dev_warn(dev, "\tGuilty fence [%p] %s: %s\n",
+		 fence, fence->name,
+		 kbase_fence_debug_status_string(status));
+
+	/* Search for blocked trigger atoms */
+	kbase_fence_debug_check_atom(katom);
+
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+	/* Dump out the full state of all the Android sync fences.
+	 * The function sync_dump() isn't exported to modules, so force
+	 * sync_fence_wait() to time out to trigger sync_dump().
+	 */
+	sync_fence_wait(fence, 1);
+}
+
+struct kbase_fence_debug_work {
+	struct kbase_jd_atom *katom;
+	struct work_struct work;
+};
+
+static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work)
+{
+	struct kbase_fence_debug_work *w = container_of(work,
+			struct kbase_fence_debug_work, work);
+	struct kbase_jd_atom *katom = w->katom;
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	kbase_fence_debug_wait_timeout(katom);
+	mutex_unlock(&kctx->jctx.lock);
+
+	kfree(w);
+}
+
+static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom)
+{
+	struct kbase_fence_debug_work *work;
+	struct kbase_context *kctx = katom->kctx;
+
+	/* Enqueue fence debug worker. Use job_done_wq to get
+	 * debug print ordered with job completion.
+	 */
+	work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC);
+	/* Ignore allocation failure. */
+	if (work) {
+		work->katom = katom;
+		INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker);
+		queue_work(kctx->jctx.job_done_wq, &work->work);
+	}
+}
+#endif /* CONFIG_MALI_FENCE_DEBUG */
+
+void kbasep_soft_job_timeout_worker(unsigned long data)
+{
+	struct kbase_context *kctx = (struct kbase_context *)data;
+	u32 timeout_ms = (u32)atomic_read(
+			&kctx->kbdev->js_data.soft_job_timeout_ms);
+	struct timer_list *timer = &kctx->soft_job_timeout;
+	ktime_t cur_time = ktime_get();
+	bool restarting = false;
+	unsigned long lflags;
+	struct list_head *entry, *tmp;
+
+	spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		struct kbase_jd_atom *katom = list_entry(entry,
+				struct kbase_jd_atom, queue);
+		s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time,
+					katom->start_timestamp));
+
+		if (elapsed_time < (s64)timeout_ms) {
+			restarting = true;
+			continue;
+		}
+
+		switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
+		case BASE_JD_REQ_SOFT_EVENT_WAIT:
+			/* Take it out of the list to ensure that it
+			 * will be cancelled in all cases
+			 */
+			list_del(&katom->queue);
+
+			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+			INIT_WORK(&katom->work, kbasep_soft_event_complete_job);
+			queue_work(kctx->jctx.job_done_wq, &katom->work);
+			break;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		case BASE_JD_REQ_SOFT_FENCE_WAIT:
+			kbase_fence_debug_timeout(katom);
+			break;
+#endif
+		}
+	}
+
+	if (restarting)
+		mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms));
+	spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	unsigned char status;
+
+	/* The status of this soft-job is stored in jc */
+	if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return 0;
+	}
+
+	if (status == BASE_JD_SOFT_EVENT_SET)
+		return 0; /* Event already set, nothing to do */
+
+	kbasep_add_waiting_with_timeout(katom);
+
+	return 1;
+}
+
+static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom,
+				     unsigned char new_status)
+{
+	/* Complete jobs waiting on the same event */
+	struct kbase_context *kctx = katom->kctx;
+
+	if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		return;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+/**
+ * kbase_soft_event_update() - Update soft event state
+ * @kctx: Pointer to context
+ * @event: Event to update
+ * @new_status: New status value of event
+ *
+ * Update the event, and wake up any atoms waiting for the event.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+int kbase_soft_event_update(struct kbase_context *kctx,
+			     u64 event,
+			     unsigned char new_status)
+{
+	int err = 0;
+
+	mutex_lock(&kctx->jctx.lock);
+
+	if (kbasep_write_soft_event_status(kctx, event, new_status)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (new_status == BASE_JD_SOFT_EVENT_SET)
+		kbasep_complete_triggered_soft_events(kctx, event);
+
+out:
+	mutex_unlock(&kctx->jctx.lock);
+
+	return err;
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (jd_done_nolock(katom, NULL))
+		kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+	size_t size;
+	struct page **pages;
+	int nr_pages;
+	size_t offset;
+	/*To find memory region*/
+	u64 gpu_addr;
+
+	struct page **extres_pages;
+	int nr_extres_pages;
+};
+
+static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+
+	if (!buffers)
+		return;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	for (i = 0; i < nr; i++) {
+		int p;
+		struct kbase_va_region *reg;
+
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (!buffers[i].pages)
+			break;
+		for (p = 0; p < buffers[i].nr_pages; p++) {
+			struct page *pg = buffers[i].pages[p];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(buffers[i].pages);
+		if (reg && reg->gpu_alloc) {
+			switch (reg->gpu_alloc->type) {
+			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+			{
+				free_user_buffer(&buffers[i]);
+				break;
+			}
+			default:
+				/* Nothing to be done. */
+				break;
+			}
+			kbase_mem_phy_alloc_put(reg->gpu_alloc);
+		}
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+	kfree(buffers);
+
+	katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers;
+	struct base_jd_debug_copy_buffer *user_buffers = NULL;
+	unsigned int i;
+	unsigned int nr = katom->nr_extres;
+	int ret = 0;
+	void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+	if (!user_structs)
+		return -EINVAL;
+
+	buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers) {
+		ret = -ENOMEM;
+		katom->jc = 0;
+		goto out_cleanup;
+	}
+	katom->jc = (u64)(uintptr_t)buffers;
+
+	user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+	if (!user_buffers) {
+		ret = -ENOMEM;
+		goto out_cleanup;
+	}
+
+	ret = copy_from_user(user_buffers, user_structs,
+			sizeof(*user_buffers)*nr);
+	if (ret)
+		goto out_cleanup;
+
+	for (i = 0; i < nr; i++) {
+		u64 addr = user_buffers[i].address;
+		u64 page_addr = addr & PAGE_MASK;
+		u64 end_page_addr = addr + user_buffers[i].size - 1;
+		u64 last_page_addr = end_page_addr & PAGE_MASK;
+		int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+		int pinned_pages;
+		struct kbase_va_region *reg;
+		struct base_external_resource user_extres;
+
+		if (!addr)
+			continue;
+
+		buffers[i].nr_pages = nr_pages;
+		buffers[i].offset = addr & ~PAGE_MASK;
+		if (buffers[i].offset >= PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+		buffers[i].size = user_buffers[i].size;
+
+		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+				GFP_KERNEL);
+		if (!buffers[i].pages) {
+			ret = -ENOMEM;
+			goto out_cleanup;
+		}
+
+		pinned_pages = get_user_pages_fast(page_addr,
+					nr_pages,
+					1, /* Write */
+					buffers[i].pages);
+		if (pinned_pages < 0) {
+			ret = pinned_pages;
+			goto out_cleanup;
+		}
+		if (pinned_pages != nr_pages) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		user_extres = user_buffers[i].extres;
+		if (user_extres.ext_resource == 0ULL) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
+		buffers[i].gpu_addr = user_extres.ext_resource &
+			~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		kbase_gpu_vm_lock(katom->kctx);
+		reg = kbase_region_tracker_find_region_enclosing_address(
+				katom->kctx, buffers[i].gpu_addr);
+
+		if (NULL == reg || NULL == reg->cpu_alloc ||
+				(reg->flags & KBASE_REG_FREE)) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+		buffers[i].nr_extres_pages = reg->nr_pages;
+		if (reg->nr_pages*PAGE_SIZE != buffers[i].size)
+			dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n");
+
+		switch (reg->gpu_alloc->type) {
+		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		{
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+			unsigned long nr_pages =
+				alloc->imported.user_buf.nr_pages;
+
+			if (alloc->imported.user_buf.mm != current->mm) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			buffers[i].extres_pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+			if (!buffers[i].extres_pages) {
+				ret = -ENOMEM;
+				goto out_unlock;
+			}
+
+			ret = get_user_pages_fast(
+					alloc->imported.user_buf.address,
+					nr_pages, 0,
+					buffers[i].extres_pages);
+			if (ret != nr_pages)
+				goto out_unlock;
+			ret = 0;
+			break;
+		}
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+		{
+			dev_warn(katom->kctx->kbdev->dev,
+					"UMP is not supported for debug_copy jobs\n");
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		default:
+			/* Nothing to be done. */
+			break;
+		}
+		kbase_gpu_vm_unlock(katom->kctx);
+	}
+	kfree(user_buffers);
+
+	return ret;
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+
+out_cleanup:
+	kfree(buffers);
+	kfree(user_buffers);
+
+	/* Frees allocated memory for kbase_debug_copy_job struct, including
+	 * members, and sets jc to 0 */
+	kbase_debug_copy_finish(katom);
+	return ret;
+}
+
+static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+		void *extres_page, struct page **pages, unsigned int nr_pages,
+		unsigned int *target_page_nr, size_t offset, size_t *to_copy)
+{
+	void *target_page = kmap(pages[*target_page_nr]);
+	size_t chunk = PAGE_SIZE-offset;
+
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	chunk = min(chunk, *to_copy);
+
+	memcpy(target_page + offset, extres_page, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+
+	*target_page_nr += 1;
+	if (*target_page_nr >= nr_pages)
+		return;
+
+	target_page = kmap(pages[*target_page_nr]);
+	if (!target_page) {
+		*target_page_nr += 1;
+		dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job.");
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(target_page);
+
+	chunk = min(offset, *to_copy);
+	memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk);
+	*to_copy -= chunk;
+
+	kunmap(pages[*target_page_nr]);
+}
+
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+		struct kbase_debug_copy_buffer *buf_data)
+{
+	unsigned int i;
+	unsigned int target_page_nr = 0;
+	struct kbase_va_region *reg;
+	struct page **pages = buf_data->pages;
+	u64 offset = buf_data->offset;
+	size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE;
+	size_t to_copy = min(extres_size, buf_data->size);
+	int ret = 0;
+
+	KBASE_DEBUG_ASSERT(pages != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(
+			kctx, buf_data->gpu_addr);
+
+	if (!reg) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+	{
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+			struct page *pg = buf_data->extres_pages[i];
+			void *extres_page = kmap(pg);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			kunmap(pg);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		break;
+	}
+	break;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+		struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
+
+		KBASE_DEBUG_ASSERT(dma_buf != NULL);
+		KBASE_DEBUG_ASSERT(dma_buf->size ==
+				   buf_data->nr_extres_pages * PAGE_SIZE);
+
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		if (ret)
+			goto out_unlock;
+
+		for (i = 0; i < buf_data->nr_extres_pages; i++) {
+
+			void *extres_page = dma_buf_kmap(dma_buf, i);
+
+			if (extres_page)
+				kbase_mem_copy_from_extres_page(kctx,
+						extres_page, pages,
+						buf_data->nr_pages,
+						&target_page_nr,
+						offset, &to_copy);
+
+			dma_buf_kunmap(dma_buf, i, extres_page);
+			if (target_page_nr >= buf_data->nr_pages)
+				break;
+		}
+		dma_buf_end_cpu_access(dma_buf,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+				0, buf_data->nr_extres_pages*PAGE_SIZE,
+#endif
+				DMA_FROM_DEVICE);
+		break;
+	}
+#endif
+	default:
+		ret = -EINVAL;
+	}
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+	struct kbase_debug_copy_buffer *buffers =
+			(struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+	unsigned int i;
+
+	for (i = 0; i < katom->nr_extres; i++) {
+		int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]);
+
+		if (res)
+			return res;
+	}
+
+	return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+	__user void *data = (__user void *)(uintptr_t) katom->jc;
+	struct base_jit_alloc_info *info;
+	int ret;
+
+	/* Fail the job if there is no info structure */
+	if (!data) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(info, data, sizeof(*info)) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* If the ID is zero then fail the job */
+	if (info->id == 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Sanity check that the PA fits within the VA */
+	if (info->va_pages < info->commit_pages) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Ensure the GPU address is correctly aligned */
+	if ((info->gpu_alloc_addr & 0x7) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/* Replace the user pointer with our kernel allocated info structure */
+	katom->jc = (u64)(uintptr_t) info;
+
+	/*
+	 * Note:
+	 * The provided info->gpu_alloc_addr isn't validated here as
+	 * userland can cache allocations which means that even
+	 * though the region is valid it doesn't represent the
+	 * same thing it used to.
+	 *
+	 * Complete validation of va_pages, commit_pages and extent
+	 * isn't done here as it will be done during the call to
+	 * kbase_mem_alloc.
+	 */
+	return 0;
+
+free_info:
+	kfree(info);
+fail:
+	katom->jc = 0;
+	return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct base_jit_alloc_info *info;
+	struct kbase_va_region *reg;
+	struct kbase_vmap_struct mapping;
+	u64 *ptr;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+	/* The JIT ID is still in use so fail the allocation */
+	if (kctx->jit_alloc[info->id]) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Mark the allocation so we know it's in use even if the
+	 * allocation itself fails.
+	 */
+	kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+	/* Create a JIT allocation */
+	reg = kbase_jit_allocate(kctx, info);
+	if (!reg) {
+		katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+		return;
+	}
+
+	/*
+	 * Write the address of the JIT allocation to the user provided
+	 * GPU allocation.
+	 */
+	ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+			&mapping);
+	if (!ptr) {
+		/*
+		 * Leave the allocation "live" as the JIT free jit will be
+		 * submitted anyway.
+		 */
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	*ptr = reg->start_pfn << PAGE_SHIFT;
+	kbase_vunmap(kctx, &mapping);
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	/*
+	 * Bind it to the user provided ID. Do this last so we can check for
+	 * the JIT free racing this JIT alloc job.
+	 */
+	kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+	struct base_jit_alloc_info *info;
+
+	info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u8 id = (u8) katom->jc;
+
+	/*
+	 * If the ID is zero or it is not in use yet then fail the job.
+	 */
+	if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		return;
+	}
+
+	/*
+	 * If the ID is valid but the allocation request failed still succeed
+	 * this soft job but don't try and free the allocation.
+	 */
+	if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+		kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+	kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+	__user struct base_external_resource_list *user_ext_res;
+	struct base_external_resource_list *ext_res;
+	u64 count = 0;
+	size_t copy_size;
+	int ret;
+
+	user_ext_res = (__user struct base_external_resource_list *)
+			(uintptr_t) katom->jc;
+
+	/* Fail the job if there is no info structure */
+	if (!user_ext_res) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Is the number of external resources in range? */
+	if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Copy the information for safe access and future storage */
+	copy_size = sizeof(*ext_res);
+	copy_size += sizeof(struct base_external_resource) * (count - 1);
+	ext_res = kzalloc(copy_size, GFP_KERNEL);
+	if (!ext_res) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+		ret = -EINVAL;
+		goto free_info;
+	}
+
+	/*
+	 * Overwrite the count with the first value incase it was changed
+	 * after the fact.
+	 */
+	ext_res->count = count;
+
+	/*
+	 * Replace the user pointer with our kernel allocated
+	 * ext_res structure.
+	 */
+	katom->jc = (u64)(uintptr_t) ext_res;
+
+	return 0;
+
+free_info:
+	kfree(ext_res);
+fail:
+	return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+	struct base_external_resource_list *ext_res;
+	int i;
+	bool failed = false;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	if (!ext_res)
+		goto failed_jc;
+
+	kbase_gpu_vm_lock(katom->kctx);
+
+	for (i = 0; i < ext_res->count; i++) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		if (map) {
+			if (!kbase_sticky_resource_acquire(katom->kctx,
+					gpu_addr))
+				goto failed_loop;
+		} else
+			if (!kbase_sticky_resource_release(katom->kctx, NULL,
+					gpu_addr))
+				failed = true;
+	}
+
+	/*
+	 * In the case of unmap we continue unmapping other resources in the
+	 * case of failure but will always report failure if _any_ unmap
+	 * request fails.
+	 */
+	if (failed)
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	else
+		katom->event_code = BASE_JD_EVENT_DONE;
+
+	kbase_gpu_vm_unlock(katom->kctx);
+
+	return;
+
+failed_loop:
+	while (--i > 0) {
+		u64 gpu_addr;
+
+		gpu_addr = ext_res->ext_res[i].ext_resource &
+				~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+		kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr);
+	}
+
+	katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+	kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+	return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+	struct base_external_resource_list *ext_res;
+
+	ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+	/* Free the info structure */
+	kfree(ext_res);
+}
+
 int kbase_process_soft_job(struct kbase_jd_atom *katom)
 {
-	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
 		return kbase_dump_cpu_gpu_time(katom);
 #ifdef CONFIG_SYNC
@@ -301,6 +1312,34 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 #endif				/* CONFIG_SYNC */
 	case BASE_JD_REQ_SOFT_REPLAY:
 		return kbase_replay_process(katom);
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		return kbasep_soft_event_wait(katom);
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
+		break;
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+	{
+		int res = kbase_debug_copy(katom);
+
+		if (res)
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		break;
+	}
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_FREE:
+		kbase_jit_free_process(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_process(katom, true);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_process(katom, false);
+		break;
 	}
 
 	/* Atom is complete */
@@ -309,12 +1348,15 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 
 void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
 {
-	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 #ifdef CONFIG_SYNC
 	case BASE_JD_REQ_SOFT_FENCE_WAIT:
 		kbase_fence_cancel_wait(katom);
 		break;
 #endif
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+		kbasep_soft_event_cancel_job(katom);
+		break;
 	default:
 		/* This soft-job doesn't support cancellation! */
 		KBASE_DEBUG_ASSERT(0);
@@ -323,7 +1365,7 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
 
 int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 {
-	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
 		{
 			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
@@ -372,8 +1414,23 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 		}
 		break;
 #endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		return kbase_jit_allocate_prepare(katom);
 	case BASE_JD_REQ_SOFT_REPLAY:
+	case BASE_JD_REQ_SOFT_JIT_FREE:
 		break;
+	case BASE_JD_REQ_SOFT_EVENT_WAIT:
+	case BASE_JD_REQ_SOFT_EVENT_SET:
+	case BASE_JD_REQ_SOFT_EVENT_RESET:
+		if (katom->jc == 0)
+			return -EINVAL;
+		break;
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		return kbase_debug_copy_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		return kbase_ext_res_prepare(katom);
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		return kbase_ext_res_prepare(katom);
 	default:
 		/* Unsupported soft-job */
 		return -EINVAL;
@@ -383,7 +1440,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 
 void kbase_finish_soft_job(struct kbase_jd_atom *katom)
 {
-	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
 		/* Nothing to do */
 		break;
@@ -405,6 +1462,19 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
 		}
 		break;
 #endif				/* CONFIG_SYNC */
+
+	case BASE_JD_REQ_SOFT_DEBUG_COPY:
+		kbase_debug_copy_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_JIT_ALLOC:
+		kbase_jit_allocate_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+		kbase_ext_res_finish(katom);
+		break;
+	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+		kbase_ext_res_finish(katom);
+		break;
 	}
 }
 
@@ -439,18 +1509,15 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
 		/* Remove from the global list */
 		list_del(&katom_iter->dep_item[1]);
 		/* Remove from the context's list of waiting soft jobs */
-		list_del(&katom_iter->dep_item[0]);
+		kbasep_remove_waiting_soft_job(katom_iter);
 
 		if (kbase_process_soft_job(katom_iter) == 0) {
 			kbase_finish_soft_job(katom_iter);
 			resched |= jd_done_nolock(katom_iter, NULL);
 		} else {
-			/* The job has not completed */
 			KBASE_DEBUG_ASSERT((katom_iter->core_req &
-					BASEP_JD_REQ_ATOM_TYPE)
+					BASE_JD_REQ_SOFT_JOB_TYPE)
 					!= BASE_JD_REQ_SOFT_REPLAY);
-			list_add_tail(&katom_iter->dep_item[0],
-					&kctx->waiting_soft_jobs);
 		}
 
 		mutex_unlock(&kctx->jctx.lock);
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h
index 6d8e34d3c3ae..820bddc8c8b1 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,15 @@ static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
 }
 #endif
 
+static inline int kbase_fence_get_status(struct sync_fence *fence)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	return fence->status;
+#else
+	return atomic_read(&fence->status);
+#endif
+}
+
 /*
  * Create a stream object.
  * Built on top of timeline object.
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c
index ddd0847a69c5..b9baa913a693 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_sync_user.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,6 @@
 #include <linux/version.h>
 #include <linux/uaccess.h>
 #include <mali_kbase_sync.h>
-#include <mali_base_kernel_sync.h>
 
 static int kbase_stream_close(struct inode *inode, struct file *file)
 {
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c
index 0994ddd7c95f..ad88b7b69018 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,9 +41,6 @@
 /* The number of nanoseconds in a second. */
 #define NSECS_IN_SEC       1000000000ull /* ns */
 
-/* The number of nanoseconds to wait before autoflushing the stream. */
-#define AUTOFLUSH_TIMEOUT  (2ull * NSECS_IN_SEC) /* ns */
-
 /* The period of autoflush checker execution in milliseconds. */
 #define AUTOFLUSH_INTERVAL 1000 /* ms */
 
@@ -142,6 +139,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_RET_ATOM_AS,
 	KBASE_TL_NRET_ATOM_AS,
 	KBASE_TL_DEP_ATOM_ATOM,
+	KBASE_TL_NDEP_ATOM_ATOM,
+	KBASE_TL_RDEP_ATOM_ATOM,
 	KBASE_TL_ATTRIB_ATOM_CONFIG,
 	KBASE_TL_ATTRIB_AS_CONFIG,
 
@@ -152,7 +151,9 @@ enum tl_msg_id_obj {
 /* Message ids of trace events that are recorded in the auxiliary stream. */
 enum tl_msg_id_aux {
 	KBASE_AUX_PM_STATE,
+	KBASE_AUX_ISSUE_JOB_SOFTSTOP,
 	KBASE_AUX_JOB_SOFTSTOP,
+	KBASE_AUX_JOB_SOFTSTOP_EX,
 	KBASE_AUX_PAGEFAULT,
 	KBASE_AUX_PAGESALLOC
 };
@@ -166,7 +167,7 @@ enum tl_msg_id_aux {
  * @wbi: write buffer index
  * @rbi: read buffer index
  * @numbered: if non-zero stream's packets are sequentially numbered
- * @last_write_time: timestamp indicating last write
+ * @autoflush_counter: counter tracking stream's autoflush state
  *
  * This structure holds information needed to construct proper packets in the
  * timeline stream. Each message in sequence must bear timestamp that is greater
@@ -177,6 +178,11 @@ enum tl_msg_id_aux {
  * Each packet in timeline body stream has sequence number embedded (this value
  * must increment monotonically and is used by packets receiver to discover
  * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
  */
 struct tl_stream {
 	spinlock_t lock;
@@ -190,7 +196,7 @@ struct tl_stream {
 	atomic_t rbi;
 
 	int      numbered;
-	u64      last_write_time;
+	atomic_t autoflush_counter;
 };
 
 /**
@@ -241,9 +247,6 @@ static atomic_t autoflush_timer_active;
  * streams at any given time. */
 static DEFINE_MUTEX(tl_reader_lock);
 
-/* Indicator of whether the timeline stream file descriptor is already used. */
-static atomic_t tlstream_busy = {0};
-
 /* Timeline stream event queue. */
 static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
 
@@ -269,8 +272,8 @@ static const struct tp_desc tp_desc_obj[] = {
 		KBASE_TL_NEW_CTX,
 		__stringify(KBASE_TL_NEW_CTX),
 		"object ctx is created",
-		"@pI",
-		"ctx,ctx_nr"
+		"@pII",
+		"ctx,ctx_nr,tgid"
 	},
 	{
 		KBASE_TL_NEW_GPU,
@@ -405,6 +408,20 @@ static const struct tp_desc tp_desc_obj[] = {
 		"@pp",
 		"atom1,atom2"
 	},
+	{
+		KBASE_TL_NDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
+		"atom2 no longer depends on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
+	{
+		KBASE_TL_RDEP_ATOM_ATOM,
+		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
+		"resolved dependecy of atom2 depending on atom1",
+		"@pp",
+		"atom1,atom2"
+	},
 	{
 		KBASE_TL_ATTRIB_ATOM_CONFIG,
 		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
@@ -437,6 +454,13 @@ static const struct tp_desc tp_desc_aux[] = {
 		"@IL",
 		"core_type,core_state_bitset"
 	},
+	{
+		KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+		__stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+		"Issuing job soft stop",
+		"@p",
+		"atom"
+	},
 	{
 		KBASE_AUX_JOB_SOFTSTOP,
 		__stringify(KBASE_AUX_JOB_SOFTSTOP),
@@ -444,6 +468,13 @@ static const struct tp_desc tp_desc_aux[] = {
 		"@I",
 		"tag_id"
 	},
+	{
+		KBASE_AUX_JOB_SOFTSTOP_EX,
+		__stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+		"Job soft stop, more details",
+		"@pI",
+		"atom,job_type"
+	},
 	{
 		KBASE_AUX_PAGEFAULT,
 		__stringify(KBASE_AUX_PAGEFAULT),
@@ -470,6 +501,11 @@ static atomic_t tlstream_bytes_generated = {0};
 
 /*****************************************************************************/
 
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
 /**
  * kbasep_tlstream_get_timestamp - return timestamp
  *
@@ -773,6 +809,9 @@ static size_t kbasep_tlstream_msgbuf_submit(
 	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
 	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
 
+	/* Set stream as flushed. */
+	atomic_set(&stream->autoflush_counter, -1);
+
 	kbasep_tlstream_packet_header_update(
 			stream->buffer[wb_idx].data,
 			wb_size - PACKET_HEADER_SIZE);
@@ -828,7 +867,7 @@ static size_t kbasep_tlstream_msgbuf_submit(
 static char *kbasep_tlstream_msgbuf_acquire(
 		enum tl_stream_type stream_type,
 		size_t              msg_size,
-		unsigned long       *flags)
+		unsigned long       *flags) __acquires(&stream->lock)
 {
 	struct tl_stream *stream;
 	unsigned int     wb_idx_raw;
@@ -875,14 +914,16 @@ static char *kbasep_tlstream_msgbuf_acquire(
  */
 static void kbasep_tlstream_msgbuf_release(
 		enum tl_stream_type stream_type,
-		unsigned long       flags)
+		unsigned long       flags) __releases(&stream->lock)
 {
 	struct tl_stream *stream;
 
 	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
 
 	stream = tl_stream[stream_type];
-	stream->last_write_time = kbasep_tlstream_get_timestamp();
+
+	/* Mark stream as containing unflushed data. */
+	atomic_set(&stream->autoflush_counter, 0);
 
 	spin_unlock_irqrestore(&stream->lock, flags);
 }
@@ -944,6 +985,22 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
 		size_t           wb_size;
 		size_t           min_size = PACKET_HEADER_SIZE;
 
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (0 > af_cnt)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
 		if (stream->numbered)
 			min_size += PACKET_NUMBER_SIZE;
 
@@ -954,16 +1011,11 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
 		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
 
 		if (wb_size > min_size) {
-			u64 timestamp = kbasep_tlstream_get_timestamp();
-
-			if (timestamp - stream->last_write_time
-					> AUTOFLUSH_TIMEOUT) {
-				wb_size = kbasep_tlstream_msgbuf_submit(
-						stream, wb_idx_raw, wb_size);
-				wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
-				atomic_set(&stream->buffer[wb_idx].size,
-						wb_size);
-			}
+			wb_size = kbasep_tlstream_msgbuf_submit(
+					stream, wb_idx_raw, wb_size);
+			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+			atomic_set(&stream->buffer[wb_idx].size,
+					wb_size);
 		}
 		spin_unlock_irqrestore(&stream->lock, flags);
 	}
@@ -1044,7 +1096,7 @@ static ssize_t kbasep_tlstream_read(
 
 	while (copy_len < size) {
 		enum tl_stream_type stype;
-		unsigned int        rb_idx_raw;
+		unsigned int        rb_idx_raw = 0;
 		unsigned int        rb_idx;
 		size_t              rb_size;
 
@@ -1134,7 +1186,12 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
 	KBASE_DEBUG_ASSERT(filp);
 	CSTD_UNUSED(inode);
 	CSTD_UNUSED(filp);
-	atomic_set(&tlstream_busy, 0);
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&autoflush_timer_active, 0);
+	del_timer_sync(&autoflush_timer);
+
+	atomic_set(&kbase_tlstream_enabled, 0);
 	return 0;
 }
 
@@ -1223,7 +1280,6 @@ static void kbasep_tlstream_timeline_header(
 int kbase_tlstream_init(void)
 {
 	enum tl_stream_type i;
-	int                 rcode;
 
 	/* Prepare stream structures. */
 	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
@@ -1241,14 +1297,10 @@ int kbase_tlstream_init(void)
 	}
 
 	/* Initialize autoflush timer. */
-	atomic_set(&autoflush_timer_active, 1);
+	atomic_set(&autoflush_timer_active, 0);
 	setup_timer(&autoflush_timer,
 			kbasep_tlstream_autoflush_timer_callback,
 			0);
-	rcode = mod_timer(
-			&autoflush_timer,
-			jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
-	CSTD_UNUSED(rcode);
 
 	return 0;
 }
@@ -1257,9 +1309,6 @@ void kbase_tlstream_term(void)
 {
 	enum tl_stream_type i;
 
-	atomic_set(&autoflush_timer_active, 0);
-	del_timer_sync(&autoflush_timer);
-
 	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
 		kbasep_timeline_stream_term(tl_stream[i]);
 		kfree(tl_stream[i]);
@@ -1268,14 +1317,16 @@ void kbase_tlstream_term(void)
 
 int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
 {
-	if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) {
+	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
+		int rcode;
+
 		*fd = anon_inode_getfd(
 				"[mali_tlstream]",
 				&kbasep_tlstream_fops,
 				kctx,
 				O_RDONLY | O_CLOEXEC);
 		if (0 > *fd) {
-			atomic_set(&tlstream_busy, 0);
+			atomic_set(&kbase_tlstream_enabled, 0);
 			return *fd;
 		}
 
@@ -1294,6 +1345,14 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
 				TL_STREAM_TYPE_AUX_HEADER,
 				tp_desc_aux,
 				ARRAY_SIZE(tp_desc_aux));
+
+		/* Start autoflush timer. */
+		atomic_set(&autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
 	} else {
 		*fd = -EBUSY;
 	}
@@ -1329,11 +1388,12 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
 
 /*****************************************************************************/
 
-void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
 {
 	const u32     msg_id = KBASE_TL_NEW_CTX;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
@@ -1349,12 +1409,15 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
 			buffer, pos, &context, sizeof(context));
 	pos = kbasep_tlstream_write_bytes(
 			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
+
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
 {
 	const u32     msg_id = KBASE_TL_NEW_GPU;
 	const size_t  msg_size =
@@ -1382,7 +1445,7 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
 {
 	const u32     msg_id = KBASE_TL_NEW_LPU;
 	const size_t  msg_size =
@@ -1410,7 +1473,7 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
 {
 	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
 	const size_t  msg_size =
@@ -1435,7 +1498,7 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
 {
 	const u32     msg_id = KBASE_TL_NEW_AS;
 	const size_t  msg_size =
@@ -1460,7 +1523,7 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
 {
 	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
 	const size_t  msg_size =
@@ -1487,11 +1550,12 @@ void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
 
 /*****************************************************************************/
 
-void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
 {
 	const u32     msg_id = KBASE_TL_NEW_CTX;
 	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+		sizeof(tgid);
 	unsigned long flags;
 	char          *buffer;
 	size_t        pos = 0;
@@ -1507,12 +1571,14 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
 			buffer, pos, &context, sizeof(context));
 	pos = kbasep_tlstream_write_bytes(
 			buffer, pos, &nr, sizeof(nr));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &tgid, sizeof(tgid));
 	KBASE_DEBUG_ASSERT(msg_size == pos);
 
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
 {
 	const u32     msg_id = KBASE_TL_NEW_ATOM;
 	const size_t  msg_size =
@@ -1537,7 +1603,7 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_del_ctx(void *context)
+void __kbase_tlstream_tl_del_ctx(void *context)
 {
 	const u32     msg_id = KBASE_TL_DEL_CTX;
 	const size_t  msg_size =
@@ -1560,7 +1626,7 @@ void kbase_tlstream_tl_del_ctx(void *context)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_del_atom(void *atom)
+void __kbase_tlstream_tl_del_atom(void *atom)
 {
 	const u32     msg_id = KBASE_TL_DEL_ATOM;
 	const size_t  msg_size =
@@ -1583,7 +1649,7 @@ void kbase_tlstream_tl_del_atom(void *atom)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
 {
 	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
 	const size_t  msg_size =
@@ -1608,7 +1674,7 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
 {
 	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
 	const size_t  msg_size =
@@ -1633,7 +1699,7 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_lpu(
+void __kbase_tlstream_tl_ret_atom_lpu(
 		void *atom, void *lpu, const char *attrib_match_list)
 {
 	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
@@ -1664,7 +1730,7 @@ void kbase_tlstream_tl_ret_atom_lpu(
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
 {
 	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
 	const size_t  msg_size =
@@ -1689,7 +1755,7 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
 {
 	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
 	const size_t  msg_size =
@@ -1714,7 +1780,7 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
 {
 	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
 	const size_t  msg_size =
@@ -1739,7 +1805,57 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
+{
+	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_OBJ,
+			msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom1, sizeof(atom1));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &atom2, sizeof(atom2));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
 {
 	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
 	const size_t  msg_size =
@@ -1764,7 +1880,7 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
 {
 	const u32     msg_id = KBASE_TL_RET_AS_CTX;
 	const size_t  msg_size =
@@ -1789,7 +1905,7 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
 {
 	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
 	const size_t  msg_size =
@@ -1814,7 +1930,7 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
 {
 	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
 	const size_t  msg_size =
@@ -1839,7 +1955,7 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
 {
 	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
 	const size_t  msg_size =
@@ -1864,7 +1980,7 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_attrib_atom_config(
+void __kbase_tlstream_tl_attrib_atom_config(
 		void *atom, u64 jd, u64 affinity, u32 config)
 {
 	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
@@ -1895,7 +2011,7 @@ void kbase_tlstream_tl_attrib_atom_config(
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_attrib_as_config(
+void __kbase_tlstream_tl_attrib_as_config(
 		void *as, u64 transtab, u64 memattr, u64 transcfg)
 {
 	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
@@ -1926,7 +2042,7 @@ void kbase_tlstream_tl_attrib_as_config(
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 {
 	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
 	const size_t  msg_size =
@@ -1951,7 +2067,7 @@ void kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 
 /*****************************************************************************/
 
-void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
 {
 	const u32     msg_id = KBASE_AUX_PM_STATE;
 	const size_t  msg_size =
@@ -1976,7 +2092,28 @@ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_job_softstop(u32 js_id)
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
+{
+	const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
 {
 	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
 	const size_t  msg_size =
@@ -1998,7 +2135,66 @@ void kbase_tlstream_aux_job_softstop(u32 js_id)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+		void *katom, u32 job_type)
+{
+	const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
+	const size_t  msg_size =
+		sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
+	unsigned long flags;
+	char          *buffer;
+	size_t        pos = 0;
+
+	buffer = kbasep_tlstream_msgbuf_acquire(
+			TL_STREAM_TYPE_AUX, msg_size, &flags);
+	KBASE_DEBUG_ASSERT(buffer);
+
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_tlstream_write_timestamp(buffer, pos);
+	pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+	pos = kbasep_tlstream_write_bytes(
+			buffer, pos, &job_type, sizeof(job_type));
+	KBASE_DEBUG_ASSERT(msg_size == pos);
+
+	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	u64 jd = katom->jc;
+
+	while (jd != 0) {
+		struct job_descriptor_header *job;
+		struct kbase_vmap_struct map;
+
+		job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev,
+				"__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+				jd, (void *)katom);
+			break;
+		}
+		if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+			kbase_vunmap(kctx, &map);
+			break;
+		}
+
+		__kbase_tlstream_aux_job_softstop_ex_record(
+				katom, job->job_type);
+
+		jd = job->job_descriptor_size ?
+			job->next_job._64 : job->next_job._32;
+		kbase_vunmap(kctx, &map);
+	}
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
 {
 	const u32     msg_id = KBASE_AUX_PAGEFAULT;
 	const size_t  msg_size =
@@ -2023,7 +2219,7 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
 	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
 {
 	const u32     msg_id = KBASE_AUX_PAGESALLOC;
 	const size_t  msg_size =
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h
index eccd469a86e8..22a0d96f9a72 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_tlstream.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,18 +102,67 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
 
 /*****************************************************************************/
 
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+		void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+		void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_as_config(
+		void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+	do {                                                        \
+		int enabled = atomic_read(&kbase_tlstream_enabled); \
+		if (enabled)                                        \
+			__kbase_tlstream_##trace_name(__VA_ARGS__); \
+	} while (0)
+
+/*****************************************************************************/
+
 /**
  * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
  *                                     summary
  * @context: name of the context object
  * @nr:      context number
+ * @tgid:    thread Group Id
  *
  * Function emits a timeline message informing about context creation. Context
  * is created with context number (its attribute), that can be used to link
  * kbase context with userspace context.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
 
 /**
  * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
@@ -125,7 +174,8 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
  * created with two attributes: id and core count.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
 
 /**
  * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
@@ -138,7 +188,8 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
  * and function bearing information about this LPU abilities.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
 
 /**
  * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
@@ -149,7 +200,8 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
  * along with GPU object.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
 
 /**
  * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
@@ -161,7 +213,8 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
  * address space.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
 
 /**
  * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
@@ -172,18 +225,21 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
  * shall be deleted along with GPU object.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
 
 /**
  * kbase_tlstream_tl_new_ctx - create context object in timeline
  * @context: name of the context object
  * @nr:      context number
+ * @tgid:    thread Group Id
  *
  * Function emits a timeline message informing about context creation. Context
  * is created with context number (its attribute), that can be used to link
  * kbase context with userspace context.
  */
-void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
 
 /**
  * kbase_tlstream_tl_new_atom - create atom object in timeline
@@ -194,7 +250,8 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
  * created with atom number (its attribute) that links it with actual work
  * bucket id understood by hardware.
  */
-void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
 
 /**
  * kbase_tlstream_tl_del_ctx - destroy context object in timeline
@@ -203,7 +260,8 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
  * Function emits a timeline message informing that context object ceased to
  * exist.
  */
-void kbase_tlstream_tl_del_ctx(void *context);
+#define kbase_tlstream_tl_del_ctx(context) \
+	__TRACE_IF_ENABLED(tl_del_ctx, context)
 
 /**
  * kbase_tlstream_tl_del_atom - destroy atom object in timeline
@@ -212,7 +270,8 @@ void kbase_tlstream_tl_del_ctx(void *context);
  * Function emits a timeline message informing that atom object ceased to
  * exist.
  */
-void kbase_tlstream_tl_del_atom(void *atom);
+#define kbase_tlstream_tl_del_atom(atom) \
+	__TRACE_IF_ENABLED(tl_del_atom, atom)
 
 /**
  * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
@@ -222,7 +281,8 @@ void kbase_tlstream_tl_del_atom(void *atom);
  * Function emits a timeline message informing that context is being held
  * by LPU and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
 
 /**
  * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
@@ -232,7 +292,8 @@ void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
  * Function emits a timeline message informing that atom object is being held
  * by context and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
 
 /**
  * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
@@ -243,8 +304,8 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
  * Function emits a timeline message informing that atom object is being held
  * by LPU and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_lpu(
-		void *atom, void *lpu, const char *attrib_match_list);
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
 
 /**
  * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
@@ -254,7 +315,8 @@ void kbase_tlstream_tl_ret_atom_lpu(
  * Function emits a timeline message informing that context is being released
  * by LPU object.
  */
-void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
 
 /**
  * kbase_tlstream_tl_nret_atom_ctx - release atom by context
@@ -264,7 +326,8 @@ void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
  * Function emits a timeline message informing that atom object is being
  * released by context.
  */
-void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
 
 /**
  * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
@@ -274,7 +337,8 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
  * Function emits a timeline message informing that atom object is being
  * released by LPU.
  */
-void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
 
 /**
  * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
@@ -284,7 +348,8 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
  * Function emits a timeline message informing that address space object
  * is being held by the context object.
  */
-void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
 
 /**
  * kbase_tlstream_tl_nret_as_ctx - release address space by context
@@ -294,7 +359,8 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
  * Function emits a timeline message informing that address space object
  * is being released by atom.
  */
-void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
 
 /**
  * kbase_tlstream_tl_ret_atom_as - retain atom by address space
@@ -304,7 +370,8 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
  * Function emits a timeline message informing that atom object is being held
  * by address space and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
 
 /**
  * kbase_tlstream_tl_nret_atom_as - release atom by address space
@@ -314,7 +381,8 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
  * Function emits a timeline message informing that atom object is being
  * released by address space.
  */
-void kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
 
 /**
  * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
@@ -324,7 +392,30 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
  * Function emits a timeline message informing that parent atom waits for
  * child atom object to be completed before start its execution.
  */
-void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
+
+/**
+ * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depended on child atom
+ *
+ * Function emits a timeline message informing that parent atom execution
+ * dependency on child atom has been resolved.
+ */
+#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \
+	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
 
 /**
  * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
@@ -335,8 +426,8 @@ void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
  *
  * Function emits a timeline message containing atom attributes.
  */
-void kbase_tlstream_tl_attrib_atom_config(
-		void *atom, u64 jd, u64 affinity, u32 config);
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
 
 /**
  * kbase_tlstream_tl_attrib_as_config - address space attributes
@@ -347,8 +438,8 @@ void kbase_tlstream_tl_attrib_atom_config(
  *
  * Function emits a timeline message containing address space attributes.
  */
-void kbase_tlstream_tl_attrib_as_config(
-		void *as, u64 transtab, u64 memattr, u64 transcfg);
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
 
 /**
  * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
@@ -357,20 +448,44 @@ void kbase_tlstream_tl_attrib_as_config(
  * This imperative tracepoint is specific to job dumping.
  * Function emits a timeline message indicating GPU soft reset.
  */
-void kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
 
 /**
  * kbase_tlstream_aux_pm_state - timeline message: power management state
  * @core_type: core type (shader, tiler, l2 cache, l3 cache)
  * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
  */
-void kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+	__TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
 
 /**
  * kbase_tlstream_aux_job_softstop - soft job stop occurred
  * @js_id: job slot id
  */
-void kbase_tlstream_aux_job_softstop(u32 js_id);
+#define kbase_tlstream_aux_job_softstop(js_id) \
+	__TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+	__TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
 
 /**
  * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
@@ -378,7 +493,8 @@ void kbase_tlstream_aux_job_softstop(u32 js_id);
  * @ctx_nr:            kernel context number
  * @page_count_change: number of pages to be added
  */
-void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
 
 /**
  * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
@@ -386,7 +502,8 @@ void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
  * @ctx_nr:     kernel context number
  * @page_count: number of pages used by the context
  */
-void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
 
 #endif /* _KBASE_TLSTREAM_H */
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c
index aac9858875ad..a606ae810656 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
 	.open = kbasep_trace_timeline_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release,
 };
 
 void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h
index d92caf054804..22a36494e72e 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_trace_timeline.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -213,7 +213,6 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
 				js, _producerof_atom_number_completed);      \
 	} while (0)
 
-
 /** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
  * certin caller */
 #define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code)                      \
@@ -319,7 +318,6 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
 
 #define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
 
-
 static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbase_jd_atom *katom, int js)
 {
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h
index 39514685b752..e880d9663d0e 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_uku.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,9 +45,20 @@
  * 10.1:
  * - Do mmap in kernel for SAME_VA memory allocations rather then
  *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ *
+ * 10.3:
+ * - base_jd_core_req typedef-ed to u32 (instead of to u16)
+ * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END
+ *
+ * 10.4:
+ * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests
  */
 #define BASE_UK_VERSION_MAJOR 10
-#define BASE_UK_VERSION_MINOR 1
+#define BASE_UK_VERSION_MINOR 4
 
 struct kbase_uk_mem_alloc {
 	union uk_header header;
@@ -292,16 +303,6 @@ struct kbase_uk_model_control_params {
 };
 #endif				/* SUPPORT_MALI_NO_MALI */
 
-#define KBASE_MAXIMUM_EXT_RESOURCES       255
-
-struct kbase_uk_ext_buff_kds_data {
-	union uk_header header;
-	union kbase_pointer external_resource;
-	union kbase_pointer file_descriptor;
-	u32 num_res;		/* limited to KBASE_MAXIMUM_EXT_RESOURCES */
-	u32 padding;
-};
-
 #ifdef BASE_LEGACY_UK8_SUPPORT
 struct kbase_uk_keep_gpu_powered {
 	union uk_header header;
@@ -327,8 +328,8 @@ struct kbase_uk_context_id {
 	int id;
 };
 
-#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
-	defined(CONFIG_MALI_MIPE_ENABLED)
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
 /**
  * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
  * @header: UK structure header
@@ -396,7 +397,7 @@ struct kbase_uk_tlstream_stats {
 	u32 bytes_generated;
 };
 #endif /* MALI_UNIT_TEST */
-#endif /* MALI_KTLSTREAM_ENABLED */
+#endif /* MALI_MIPE_ENABLED */
 
 /**
  * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
@@ -411,6 +412,38 @@ struct kbase_uk_prfcnt_values {
 	u32 size;
 };
 
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+	union uk_header header;
+	/* IN */
+	u64 evt;
+	u32 new_status;
+	u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+};
 
 enum kbase_uk_function_id {
 	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
@@ -438,7 +471,6 @@ enum kbase_uk_function_id {
 	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
 
 	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
-	KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
 	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
 
 	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
@@ -463,15 +495,15 @@ enum kbase_uk_function_id {
 
 	KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
 
-#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
-	defined(CONFIG_MALI_MIPE_ENABLED)
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+	!defined(MALI_MIPE_ENABLED)
 	KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
 #if MALI_UNIT_TEST
 	KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
 	KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
 #endif /* MALI_UNIT_TEST */
 	KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
-#endif /* MALI_KTLSTREAM_ENABLED */
+#endif /* MALI_MIPE_ENABLED */
 
 	KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
 
@@ -479,6 +511,10 @@ enum kbase_uk_function_id {
 	KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
 #endif
 
+	KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+	KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
 	KBASE_FUNC_MAX
 };
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c
index d3d27e2958d7..bd6095f77480 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,8 +28,10 @@
 #include <linux/wait.h>
 
 #include <mali_kbase.h>
+#include <mali_kbase_hwaccess_instr.h>
 #include <mali_kbase_hwcnt_reader.h>
 #include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
 
 /*****************************************************************************/
 
@@ -61,6 +63,14 @@ enum {
 	JM_HWCNT_BM
 };
 
+enum vinstr_state {
+	VINSTR_IDLE,
+	VINSTR_DUMPING,
+	VINSTR_SUSPENDING,
+	VINSTR_SUSPENDED,
+	VINSTR_RESUMING
+};
+
 /**
  * struct kbase_vinstr_context - vinstr context per device
  * @lock:              protects the entire vinstr context
@@ -74,7 +84,12 @@ enum {
  *                     with hardware
  * @reprogram:         when true, reprogram hwcnt block with the new set of
  *                     counters
- * @suspended:         when true, the context has been suspended
+ * @state:             vinstr state
+ * @state_lock:        protects information about vinstr state
+ * @suspend_waitq:     notification queue to trigger state re-validation
+ * @suspend_cnt:       reference counter of vinstr's suspend state
+ * @suspend_work:      worker to execute on entering suspended state
+ * @resume_work:       worker to execute on leaving suspended state
  * @nclients:          number of attached clients, pending or otherwise
  * @waiting_clients:   head of list of clients being periodically sampled
  * @idle_clients:      head of list of clients being idle
@@ -94,7 +109,13 @@ struct kbase_vinstr_context {
 	size_t                   dump_size;
 	u32                      bitmap[4];
 	bool                     reprogram;
-	bool                     suspended;
+
+	enum vinstr_state        state;
+	struct spinlock          state_lock;
+	wait_queue_head_t        suspend_waitq;
+	unsigned int             suspend_cnt;
+	struct work_struct       suspend_work;
+	struct work_struct       resume_work;
 
 	u32                      nclients;
 	struct list_head         waiting_clients;
@@ -189,7 +210,10 @@ static const struct file_operations vinstr_client_fops = {
 
 static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
 {
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	struct kbase_uk_hwcnt_setup setup;
+	int err;
 
 	setup.dump_buffer = vinstr_ctx->gpu_va;
 	setup.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
@@ -197,12 +221,46 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
 	setup.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
 	setup.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
 
-	return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup);
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a
+	 * userspace thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup);
+	if (err) {
+		/* Release the context. This had its own Power Manager Active
+		 * reference */
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+		/* Also release our Power Manager Active reference */
+		kbase_pm_context_idle(kbdev);
+	}
+
+	return err;
 }
 
 static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
 {
-	kbase_instr_hwcnt_disable(vinstr_ctx->kctx);
+	struct kbase_context *kctx = vinstr_ctx->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+
+	err = kbase_instr_hwcnt_disable_internal(kctx);
+	if (err) {
+		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
+				kctx);
+		return;
+	}
+
+	/* Release the context. This had its own Power Manager Active reference. */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference. */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
 }
 
 static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
@@ -309,6 +367,10 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer(
  */
 static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
 {
+	struct kbase_device *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	unsigned long flags;
+	bool enable_backend = false;
 	int err;
 
 	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
@@ -324,10 +386,48 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
 		return err;
 	}
 
-	err = enable_hwcnt(vinstr_ctx);
+	/* Add kernel context to list of contexts associated with device. */
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (element) {
+		element->kctx = vinstr_ctx->kctx;
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_add(&element->link, &kbdev->kctx_list);
+
+		/* Inform timeline client about new context.
+		 * Do this while holding the lock to avoid tracepoint
+		 * being created in both body and summary stream. */
+		kbase_tlstream_tl_new_ctx(
+				vinstr_ctx->kctx,
+				(u32)(vinstr_ctx->kctx->id),
+				(u32)(vinstr_ctx->kctx->tgid));
+
+		mutex_unlock(&kbdev->kctx_list_lock);
+	} else {
+		/* Don't treat this as a fail - just warn about it. */
+		dev_warn(kbdev->dev,
+				"couldn't add kctx to kctx_list\n");
+	}
+
+	/* Don't enable hardware counters if vinstr is suspended.
+	 * Note that vinstr resume code is run under vinstr context lock,
+	 * lower layer will be enabled as needed on resume. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE == vinstr_ctx->state)
+		enable_backend = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (enable_backend)
+		err = enable_hwcnt(vinstr_ctx);
+
 	if (err) {
 		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
 		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
 		vinstr_ctx->kctx = NULL;
 		return err;
 	}
@@ -340,6 +440,13 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
 		disable_hwcnt(vinstr_ctx);
 		kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
 		kbase_destroy_context(vinstr_ctx->kctx);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			list_del(&element->link);
+			kfree(element);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		}
+		kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
 		vinstr_ctx->kctx = NULL;
 		return -EFAULT;
 	}
@@ -353,11 +460,34 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
  */
 static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
 {
+	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+	struct kbasep_kctx_list_element *element;
+	struct kbasep_kctx_list_element *tmp;
+	bool                            found = false;
+
 	/* Release hw counters dumping resources. */
 	vinstr_ctx->thread = NULL;
 	disable_hwcnt(vinstr_ctx);
 	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
 	kbase_destroy_context(vinstr_ctx->kctx);
+
+	/* Remove kernel context from the device's contexts list. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == vinstr_ctx->kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found = true;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	if (!found)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	/* Inform timeline client about context destruction. */
+	kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
 	vinstr_ctx->kctx = NULL;
 }
 
@@ -379,9 +509,10 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 	struct kbase_vinstr_client *cli;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	KBASE_DEBUG_ASSERT(buffer_count >= 0);
-	KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT);
-	KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1)));
+
+	if (buffer_count > MAX_BUFFER_COUNT
+	    || (buffer_count & (buffer_count - 1)))
+		return NULL;
 
 	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
 	if (!cli)
@@ -435,7 +566,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
 
 		/* Allocate required number of dumping buffers. */
 		cli->dump_buffers = (char *)__get_free_pages(
-				GFP_KERNEL,
+				GFP_KERNEL | __GFP_ZERO,
 				get_order(cli->dump_size * cli->buffer_count));
 		if (!cli->dump_buffers)
 			goto error;
@@ -802,6 +933,7 @@ static void kbasep_vinstr_add_dump_request(
 static int kbasep_vinstr_collect_and_accumulate(
 		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
 {
+	unsigned long flags;
 	int rcode;
 
 #ifdef CONFIG_MALI_NO_MALI
@@ -809,6 +941,15 @@ static int kbasep_vinstr_collect_and_accumulate(
 	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
 #endif
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state) {
+		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+		return -EAGAIN;
+	} else {
+		vinstr_ctx->state = VINSTR_DUMPING;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	/* Request HW counters dump.
 	 * Disable preemption to make dump timestamp more accurate. */
 	preempt_disable();
@@ -820,6 +961,21 @@ static int kbasep_vinstr_collect_and_accumulate(
 		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
 	WARN_ON(rcode);
 
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state)
+	{
+	case VINSTR_SUSPENDING:
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_IDLE;
+		wake_up_all(&vinstr_ctx->suspend_waitq);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
 	/* Accumulate values of collected counters. */
 	if (!rcode)
 		accum_clients(vinstr_ctx);
@@ -907,6 +1063,20 @@ static int kbasep_vinstr_fill_dump_buffer_kernel(
 static void kbasep_vinstr_reprogram(
 		struct kbase_vinstr_context *vinstr_ctx)
 {
+	unsigned long flags;
+	bool suspended = false;
+
+	/* Don't enable hardware counters if vinstr is suspended. */
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	if (VINSTR_IDLE != vinstr_ctx->state)
+		suspended = true;
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+	if (suspended)
+		return;
+
+	/* Change to suspended state is done while holding vinstr context
+	 * lock. Below code will then no re-enable the instrumentation. */
+
 	if (vinstr_ctx->reprogram) {
 		struct kbase_vinstr_client *iter;
 
@@ -1011,6 +1181,7 @@ static int kbasep_vinstr_service_task(void *data)
 	while (!kthread_should_stop()) {
 		struct kbase_vinstr_client *cli = NULL;
 		struct kbase_vinstr_client *tmp;
+		int                        rcode;
 
 		u64              timestamp = kbasep_vinstr_get_timestamp();
 		u64              dump_time = 0;
@@ -1053,7 +1224,8 @@ static int kbasep_vinstr_service_task(void *data)
 			continue;
 		}
 
-		kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &timestamp);
+		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
+				&timestamp);
 
 		INIT_LIST_HEAD(&expired_requests);
 
@@ -1082,10 +1254,11 @@ static int kbasep_vinstr_service_task(void *data)
 			/* Expect only periodically sampled clients. */
 			BUG_ON(0 == cli->dump_interval);
 
-			kbasep_vinstr_update_client(
-					cli,
-					timestamp,
-					BASE_HWCNT_READER_EVENT_PERIODIC);
+			if (!rcode)
+				kbasep_vinstr_update_client(
+						cli,
+						timestamp,
+						BASE_HWCNT_READER_EVENT_PERIODIC);
 
 			/* Set new dumping time. Drop missed probing times. */
 			do {
@@ -1214,11 +1387,6 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
 
 	mutex_lock(&vinstr_ctx->lock);
 
-	if (vinstr_ctx->suspended) {
-		mutex_unlock(&vinstr_ctx->lock);
-		return -EBUSY;
-	}
-
 	list_del(&cli->list);
 
 	cli->dump_interval = interval;
@@ -1455,7 +1623,8 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
 		struct vm_area_struct *vma)
 {
 	struct kbase_vinstr_client *cli;
-	size_t                     size;
+	unsigned long size, addr, pfn, offset;
+	unsigned long vm_size = vma->vm_end - vma->vm_start;
 
 	KBASE_DEBUG_ASSERT(filp);
 	KBASE_DEBUG_ASSERT(vma);
@@ -1464,14 +1633,24 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
 	KBASE_DEBUG_ASSERT(cli);
 
 	size = cli->buffer_count * cli->dump_size;
-	if (vma->vm_end - vma->vm_start > size)
-		return -ENOMEM;
+
+	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
+		return -EINVAL;
+	if (vm_size > size)
+		return -EINVAL;
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	if ((vm_size + offset) > size)
+		return -EINVAL;
+
+	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	pfn = addr >> PAGE_SHIFT;
 
 	return remap_pfn_range(
 			vma,
 			vma->vm_start,
-			__pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT,
-			size,
+			pfn,
+			vm_size,
 			vma->vm_page_prot);
 }
 
@@ -1498,6 +1677,84 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
 
 /*****************************************************************************/
 
+/**
+ * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
+ * @kbdev: pointer to kbase device structure
+ */
+static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	down(&js_devdata->schedule_sem);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbase_jm_kick_all(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	up(&js_devdata->schedule_sem);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker suspending vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_suspend_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			suspend_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		disable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_SUSPENDED;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * This must happen after vinstr was suspended. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/**
+ * kbasep_vinstr_suspend_worker - worker resuming vinstr module
+ * @data: pointer to work structure
+ */
+static void kbasep_vinstr_resume_worker(struct work_struct *data)
+{
+	struct kbase_vinstr_context *vinstr_ctx;
+	unsigned long flags;
+
+	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
+			resume_work);
+
+	mutex_lock(&vinstr_ctx->lock);
+
+	if (vinstr_ctx->kctx)
+		enable_hwcnt(vinstr_ctx);
+
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	vinstr_ctx->state = VINSTR_IDLE;
+	wake_up_all(&vinstr_ctx->suspend_waitq);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
+
+	mutex_unlock(&vinstr_ctx->lock);
+
+	/* Kick GPU scheduler to allow entering protected mode.
+	 * Note that scheduler state machine might requested re-entry to
+	 * protected mode before vinstr was resumed.
+	 * This must happen after vinstr was release. */
+	kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
+}
+
+/*****************************************************************************/
+
 struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
 {
 	struct kbase_vinstr_context *vinstr_ctx;
@@ -1509,8 +1766,14 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
 	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
 	mutex_init(&vinstr_ctx->lock);
+	spin_lock_init(&vinstr_ctx->state_lock);
 	vinstr_ctx->kbdev = kbdev;
 	vinstr_ctx->thread = NULL;
+	vinstr_ctx->state = VINSTR_IDLE;
+	vinstr_ctx->suspend_cnt = 0;
+	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
+	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
+	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
 
 	atomic_set(&vinstr_ctx->request_pending, 0);
 	init_waitqueue_head(&vinstr_ctx->waitq);
@@ -1526,6 +1789,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
 	if (vinstr_ctx->thread)
 		kthread_stop(vinstr_ctx->thread);
 
+	/* Wait for workers. */
+	flush_work(&vinstr_ctx->suspend_work);
+	flush_work(&vinstr_ctx->resume_work);
+
 	while (1) {
 		struct list_head *list = &vinstr_ctx->idle_clients;
 
@@ -1658,11 +1925,6 @@ int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
 
 	mutex_lock(&vinstr_ctx->lock);
 
-	if (vinstr_ctx->suspended) {
-		rcode = -EBUSY;
-		goto exit;
-	}
-
 	if (event_mask & cli->event_mask) {
 		rcode = kbasep_vinstr_collect_and_accumulate(
 				vinstr_ctx,
@@ -1698,11 +1960,6 @@ int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
 
 	mutex_lock(&vinstr_ctx->lock);
 
-	if (vinstr_ctx->suspended) {
-		rcode = -EBUSY;
-		goto exit;
-	}
-
 	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
 	if (rcode)
 		goto exit;
@@ -1719,40 +1976,66 @@ exit:
 	return rcode;
 }
 
-void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx)
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
 {
-	u64 unused;
+	unsigned long flags;
+	int ret = -EAGAIN;
 
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
-	mutex_lock(&vinstr_ctx->lock);
-	if (!vinstr_ctx->nclients || vinstr_ctx->suspended) {
-		mutex_unlock(&vinstr_ctx->lock);
-		return;
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	switch (vinstr_ctx->state) {
+	case VINSTR_SUSPENDED:
+		vinstr_ctx->suspend_cnt++;
+		/* overflow shall not happen */
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		ret = 0;
+		break;
+
+	case VINSTR_IDLE:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		schedule_work(&vinstr_ctx->suspend_work);
+		break;
+
+	case VINSTR_DUMPING:
+		vinstr_ctx->state = VINSTR_SUSPENDING;
+		break;
+
+	case VINSTR_SUSPENDING:
+		/* fall through */
+	case VINSTR_RESUMING:
+		break;
+
+	default:
+		BUG();
+		break;
 	}
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 
-	kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
-	vinstr_ctx->suspended = true;
-	vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients;
-	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
-	mutex_unlock(&vinstr_ctx->lock);
+	return ret;
 }
 
-void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx)
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
 {
+	wait_event(vinstr_ctx->suspend_waitq,
+			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
+{
+	unsigned long flags;
+
 	KBASE_DEBUG_ASSERT(vinstr_ctx);
 
-	mutex_lock(&vinstr_ctx->lock);
-	if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) {
-		mutex_unlock(&vinstr_ctx->lock);
-		return;
+	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
+	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
+		BUG_ON(0 == vinstr_ctx->suspend_cnt);
+		vinstr_ctx->suspend_cnt--;
+		if (0 == vinstr_ctx->suspend_cnt) {
+			vinstr_ctx->state = VINSTR_RESUMING;
+			schedule_work(&vinstr_ctx->resume_work);
+		}
 	}
-
-	vinstr_ctx->suspended = false;
-	vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients;
-	vinstr_ctx->reprogram = true;
-	kbasep_vinstr_reprogram(vinstr_ctx);
-	atomic_set(&vinstr_ctx->request_pending, 1);
-	wake_up_all(&vinstr_ctx->waitq);
-	mutex_unlock(&vinstr_ctx->lock);
+	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
 }
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h
index d32462aec653..6207d25aef06 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_kbase_vinstr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -103,18 +103,39 @@ int kbase_vinstr_hwc_dump(
 int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
 
 /**
- * kbase_vinstr_hwc_suspend - suspends hardware counter collection for
- *                            a given kbase context
+ * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
  * @vinstr_ctx: vinstr context
+ *
+ * Return: 0 on success, or negative if state change is in progress
+ *
+ * Warning: This API call is non-generic. It is meant to be used only by
+ *          job scheduler state machine.
+ *
+ * Function initiates vinstr switch to suspended state. Once it was called
+ * vinstr enters suspending state. If function return non-zero value, it
+ * indicates that state switch is not complete and function must be called
+ * again. On state switch vinstr will trigger job scheduler state machine
+ * cycle.
+ */
+int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
+
+/**
+ * kbase_vinstr_suspend - suspends operation of a given vinstr context
+ * @vinstr_ctx: vinstr context
+ *
+ * Function initiates vinstr switch to suspended state. Then it blocks until
+ * operation is completed.
  */
-void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx);
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
 
 /**
- * kbase_vinstr_hwc_resume - resumes hardware counter collection for
- *                            a given kbase context
+ * kbase_vinstr_resume - resumes operation of a given vinstr context
  * @vinstr_ctx: vinstr context
+ *
+ * Function can be called only if it was preceded by a successful call
+ * to kbase_vinstr_suspend.
  */
-void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx);
+void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
 
 /**
  * kbase_vinstr_dump_size - Return required size of dump buffer
@@ -126,7 +147,7 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
 
 /**
  * kbase_vinstr_detach_client - Detach a client from the vinstr core
- * @cli: Pointer to vinstr client
+ * @cli: pointer to vinstr client
  */
 void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
 
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h b/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h
index fc3cf32ba4d2..2be06a552768 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_linux_trace.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,19 +15,15 @@
 
 
 
-
-
 #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_MALI_H
 
-#include <linux/stringify.h>
-#include <linux/tracepoint.h>
-
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM mali
-#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE mali_linux_trace
 
+#include <linux/tracepoint.h>
+
 #define MALI_JOB_SLOTS_EVENT_CHANGED
 
 /**
@@ -183,24 +179,6 @@ TRACE_EVENT(mali_total_alloc_pages_change,
 	TP_printk("event=%lld", __entry->event_id)
 );
 
-/**
- * mali_sw_counter - not currently used
- * @event_id: counter id
- */
-TRACE_EVENT(mali_sw_counter,
-	TP_PROTO(unsigned int event_id, signed long long value),
-	TP_ARGS(event_id, value),
-	TP_STRUCT__entry(
-		__field(int, event_id)
-		__field(long long, value)
-	),
-	TP_fast_assign(
-		__entry->event_id = event_id;
-		__entry->value = value;
-	),
-	TP_printk("event %d = %lld", __entry->event_id, __entry->value)
-);
-
 #endif				/*  _TRACE_MALI_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h b/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h
index 778104b57fa9..de6c206f3c71 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_midg_regmap.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@
 
 #define GPU_COMMAND             0x030	/* (WO) */
 #define GPU_STATUS              0x034	/* (RO) */
-
+#define LATEST_FLUSH            0x038	/* (RO) */
 
 #define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
 
@@ -169,6 +169,8 @@
 #define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
 #define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
 
+#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
 
 #define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
 #define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
@@ -212,6 +214,8 @@
 #define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
 #define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
 #define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
 
 #define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
 #define JS_STATUS              0x24	/* (RO) Status register for job slot n */
@@ -222,9 +226,12 @@
 #define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
 #define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
 #define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
 
 #define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
 
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
 
 #define MEMORY_MANAGEMENT_BASE  0x2000
 #define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
@@ -266,6 +273,14 @@
 #define AS_STATUS              0x28	/* (RO) Status flags for address space n */
 
 
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
 
 /* End Register Offsets */
 
@@ -293,6 +308,11 @@
 
 #define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
 
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
 
 /*
  * Begin MMU STATUS register values
@@ -305,12 +325,38 @@
 #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
 #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
 
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
 
 #define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
 
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
 
 /*
  * Begin Command Values
@@ -348,8 +394,15 @@
 #define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
 #define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
 #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
 #define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
 
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
 /* JS_STATUS register values */
 
 /* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
@@ -400,19 +453,35 @@
 #define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
 #define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
 #define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
 
 /* End Command Values */
 
 /* GPU_STATUS values */
 #define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
 
 /* PRFCNT_CONFIG register values */
-#define PRFCNT_CONFIG_AS_SHIFT    4	/* address space bitmap starts from bit 4 of the register */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
 #define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
 #define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
 #define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
 
 /* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
 
 /* Use GPU implementation-defined  caching policy. */
 #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
@@ -457,6 +526,8 @@
 /* End JS<n>_FEATURES register */
 
 /* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
@@ -505,5 +576,4 @@
 
 /* End TILER_CONFIG register */
 
-
 #endif /* _MIDGARD_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/mali_timeline.h b/drivers/gpu/arm/midgard_for_linux/mali_timeline.h
index c3563723cb63..bd5f6614b6bb 100644
--- a/drivers/gpu/arm/midgard_for_linux/mali_timeline.h
+++ b/drivers/gpu/arm/midgard_for_linux/mali_timeline.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -386,7 +386,6 @@ TRACE_EVENT(mali_timeline_context_active,
 			__entry->count)
 );
 
-
 #endif /* _MALI_TIMELINE_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h
index d4813f7f8a35..34f6d57382e2 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/devicetree/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -73,8 +73,8 @@
 extern struct kbase_pm_callback_conf pm_callbacks;
 
 /**
- * Secure mode switch
+ * Protected mode switch
  *
- * Attached value: pointer to @ref kbase_secure_ops
+ * Attached value: pointer to @ref kbase_protected_ops
  */
-#define SECURE_CALLBACKS (NULL)
+#define PROTECTED_CALLBACKS (NULL)
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c
index 3baf3d96d41a..c65481810927 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c
+++ b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,48 +66,76 @@ struct kbase_pm_callback_conf pm_callbacks = {
 };
 
 /*
- * Juno Secure Mode integration
+ * Juno Protected Mode integration
  */
 
 /* SMC Function Numbers */
-#define JUNO_SMC_SECURE_ENABLE_FUNC  0xff06
-#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07
+#define JUNO_SMC_PROTECTED_ENTER_FUNC  0xff06
+#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07
 
-static int juno_secure_mode_enable(struct kbase_device *kbdev)
+static int juno_protected_mode_enter(struct kbase_device *kbdev)
 {
-	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_ENTER_FUNC, false,
+		0, 0, 0);
+	return ret;
+}
 
-	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
-			kbdev->reg_start == 0x2d000000) {
-		/* T62X in SoC detected */
-		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
-			JUNO_SMC_SECURE_ENABLE_FUNC, false,
-			0, 0, 0);
-		return ret;
-	}
-
-	return -EINVAL; /* Not supported */
+/* TODO: Remove these externs, reset should should be done by the firmware */
+extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+						struct kbase_context *kctx);
+
+extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+						struct kbase_context *kctx);
+
+static int juno_protected_mode_reset(struct kbase_device *kbdev)
+{
+
+	/* T62X in SoC detected */
+	u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
+		JUNO_SMC_PROTECTED_RESET_FUNC, false,
+		0, 0, 0);
+
+	/* TODO: Remove this reset, it should be done by the firmware */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+						GPU_COMMAND_HARD_RESET, NULL);
+
+	while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL)
+			& RESET_COMPLETED) != RESET_COMPLETED)
+		;
+
+	return ret;
 }
 
-static int juno_secure_mode_disable(struct kbase_device *kbdev)
+static bool juno_protected_mode_supported(struct kbase_device *kbdev)
 {
 	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
 
+	/*
+	 * Protected mode is only supported for the built in GPU
+	 * _and_ only if the right firmware is running.
+	 *
+	 * Given that at init time the GPU is not powered up the
+	 * juno_protected_mode_reset function can't be used as
+	 * is needs to access GPU registers.
+	 * However, although we don't want the GPU to boot into
+	 * protected mode we know a GPU reset will be done after
+	 * this function is called so although we set the GPU to
+	 * protected mode it will exit protected mode before the
+	 * driver is ready to run work.
+	 */
 	if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) &&
-			kbdev->reg_start == 0x2d000000) {
-		/* T62X in SoC detected */
-		u64 ret = kbase_invoke_smc(SMC_OEN_SIP,
-			JUNO_SMC_SECURE_DISABLE_FUNC, false,
-			0, 0, 0);
-		return ret;
-	}
-
-	return -EINVAL; /* Not supported */
+			(kbdev->reg_start == 0x2d000000))
+		return juno_protected_mode_enter(kbdev) == 0;
+
+	return false;
 }
 
-struct kbase_secure_ops juno_secure_ops = {
-	.secure_mode_enable = juno_secure_mode_enable,
-	.secure_mode_disable = juno_secure_mode_disable,
+struct kbase_protected_ops juno_protected_ops = {
+	.protected_mode_enter = juno_protected_mode_enter,
+	.protected_mode_reset = juno_protected_mode_reset,
+	.protected_mode_supported = juno_protected_mode_supported,
 };
 
 static struct kbase_platform_config versatile_platform_config = {
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h
index 5fc6d9e1c312..ab29e9d9c0a6 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/juno_soc/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,14 +71,14 @@
 #define PLATFORM_FUNCS (NULL)
 
 /**
- * Secure mode switch
+ * Protected mode switch
  *
- * Attached value: pointer to @ref kbase_secure_ops
+ * Attached value: pointer to @ref kbase_protected_ops
  */
-#define SECURE_CALLBACKS (&juno_secure_ops)
+#define PROTECTED_CALLBACKS (&juno_protected_ops)
 
 extern struct kbase_pm_callback_conf pm_callbacks;
 #ifdef CONFIG_DEVFREQ_THERMAL
 extern struct devfreq_cooling_ops juno_model_ops;
 #endif
-extern struct kbase_secure_ops juno_secure_ops;
+extern struct kbase_protected_ops juno_protected_ops;
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h
index 6384586371d0..dc4471beae67 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
  * Attached value: number in kHz
  * Default value: NA
  */
-#define GPU_FREQ_KHZ_MAX (5000)
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
 /**
  * Minimum frequency GPU will be clocked at. Given in kHz.
  * This must be specified as there is no default value.
@@ -32,16 +32,7 @@
  * Attached value: number in kHz
  * Default value: NA
  */
-#define GPU_FREQ_KHZ_MIN (5000)
-
-/**
- * Values used for determining the GPU frequency based on the LogicTile type
- * Used by the function kbase_get_platform_logic_tile_type
- */
-#define VE_VIRTEX6_GPU_FREQ_MIN 5000
-#define VE_VIRTEX6_GPU_FREQ_MAX 5000
-#define VE_VIRTEX7_GPU_FREQ_MIN 40000
-#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
 
 /**
  * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
@@ -82,10 +73,10 @@
 #define PLATFORM_FUNCS (NULL)
 
 /**
- * Secure mode switch
+ * Protected mode switch
  *
- * Attached value: pointer to @ref kbase_secure_ops
+ * Attached value: pointer to @ref kbase_protected_ops
  */
-#define SECURE_CALLBACKS (NULL)
+#define PROTECTED_CALLBACKS (NULL)
 
 extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c
index 687b1a8c0431..15ce2bc5eea5 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_config_vexpress.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c
index 9bc51f1e2da8..4665f98cbbe4 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,18 @@
 
 #define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
 
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
 #define CPU_CLOCK_SPEED_UNDEFINED (0)
 
 static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
@@ -197,14 +209,71 @@ syscfg_reg_map_failed:
 	return err;
 }
 
-u32 kbase_get_platform_logic_tile_type(void)
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
 {
 	void __iomem *syscfg_reg = NULL;
 	u32 sys_procid1 = 0;
 
 	syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
-
-	sys_procid1 = (NULL != syscfg_reg) ? readl(syscfg_reg) : 0;
+	if (NULL != syscfg_reg) {
+		sys_procid1 = readl(syscfg_reg);
+		iounmap(syscfg_reg);
+	}
 
 	return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
 }
+
+u32 kbase_get_platform_min_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MIN;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MIN;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MIN;
+	}
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+	u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+	switch (ve_logic_tile) {
+	case 0x217:
+		/* Virtex 6, HBI0217 */
+		return VE_VIRTEX6_GPU_FREQ_MAX;
+	case 0x247:
+		/* Virtex 7, HBI0247 */
+		return VE_VIRTEX7_GPU_FREQ_MAX;
+	default:
+		/* all other logic tiles, i.e., Virtex 5 HBI0192
+		 * or unsuccessful reading from the platform -
+		 * fall back to some default value */
+		return VE_DEFAULT_GPU_FREQ_MAX;
+	}
+}
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.h
index ef9bfd721618..da865698133a 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,23 +26,13 @@
 int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
 
 /**
- * kbase_get_platform_logic_tile_type -  determines which LogicTile type 
- * is used by Versatile Express
- *
- * When platform_config build parameter is specified as vexpress, i.e.,
- * platform_config=vexpress, GPU frequency may vary dependent on the
- * particular platform. The GPU frequency depends on the LogicTile type.
- *
- * This function is called by kbase_common_device_init to determine
- * which LogicTile type is used by the platform by reading the HBI value
- * of the daughterboard which holds the LogicTile:
- *
- * 0x192 HBI0192 Virtex-5
- * 0x217 HBI0217 Virtex-6
- * 0x247 HBI0247 Virtex-7
- *
- * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
  */
-u32 kbase_get_platform_logic_tile_type(void);
+u32 kbase_get_platform_max_freq(void);
 
 #endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
index 2b91d72bd93c..b0490ca331dd 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -71,10 +71,10 @@
 #define PLATFORM_FUNCS (NULL)
 
 /**
- * Secure mode switch
+ * Protected mode switch
  *
- * Attached value: pointer to @ref kbase_secure_ops
+ * Attached value: pointer to @ref kbase_protected_ops
  */
-#define SECURE_CALLBACKS (NULL)
+#define PROTECTED_CALLBACKS (NULL)
 
 extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard_for_linux/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard_for_linux/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
index d269c2591490..22ffccb3aaed 100644
--- a/drivers/gpu/arm/midgard_for_linux/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
+++ b/drivers/gpu/arm/midgard_for_linux/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -73,10 +73,10 @@
 #define PLATFORM_FUNCS (NULL)
 
 /**
- * Secure mode switch
+ * Protected mode switch
  *
- * Attached value: pointer to @ref kbase_secure_ops
+ * Attached value: pointer to @ref kbase_protected_ops
  */
-#define SECURE_CALLBACKS (NULL)
+#define PROTECTED_CALLBACKS (NULL)
 
 extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard_for_linux/sconscript b/drivers/gpu/arm/midgard_for_linux/sconscript
index 7e41a438ac53..c36e92dd0d7a 100644
--- a/drivers/gpu/arm/midgard_for_linux/sconscript
+++ b/drivers/gpu/arm/midgard_for_linux/sconscript
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -19,14 +19,12 @@ import re
 import sys
 Import('env')
 
+
 if Glob('tests/sconscript'):
 	SConscript( 'tests/sconscript' )
 
 mock_test = 0
 
-if env['v'] != '1':
-	env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 # Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
 # For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
 if env['platform_config']=='devicetree':
@@ -46,8 +44,7 @@ kbase_src = [Glob('#kernel/drivers/gpu/arm/midgard/*.c'),
              Glob('#kernel/drivers/gpu/arm/midgard/K*'))
              ]
 
-kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c'),
-              Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*/*.c')]
+kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/internal/*/*.c')]
 
 if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1':
 	kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')]
@@ -125,10 +122,6 @@ if env['os'] != 'android':
 if int(env['ump']) == 1:
 	env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/ump.ko')
 
-if Glob('internal/sconsfrag'):
-	execfile('internal/sconsfrag')
-	get_internal(env)
-
-env.ProgTarget('kbase', cmd)
+env.KernelObjTarget('kbase', cmd)
 
 env.AppendUnique(BASE=['cutils_linked_list'])
-- 
2.34.1