From 88d444dfb9e5776c0234e37eeea5bf1a37f8a71f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 13 Jul 2012 12:06:26 +0100 Subject: [PATCH] ARM: vexpress: add TC2 CPU idle PM TC2 test-chip integrates power management circuitry and firmware that allows to remove voltage from both (A7 and A15) clusters when they are idle or more generically when the system is forced into shutdown mode. All CPUs in a cluster share the same voltage source so they cannot be shutdown independently. In order to take advantage of TC2 power management capabilities this patch implements a multi-cluster aware CPU idle implementation. It is based on coupled C-state concept provided by this code: http://lists.infradead.org/pipermail/linux-arm-kernel/2012-April/097084.html CPUs that are part of the same cluster are coupled using the mask provided by the MPIDR at boot. Once all CPUs hit the coupled barrier the primary CPU in the cluster (the one with MPIDR[7:0] == 0) waits for secondaries to clean their L1 and enter wfi. Then it cleans all cache levels, exits cluster coherency and starts the procedure to shutdown the respective cluster. All wake-up IRQ sources are enabled by default. Deep shutdown states for clusters are not enabled by default. To enabled them: A15 cluster echo 0 > /sys/kernel/debug/idle_debug/enable_idle A7 cluster echo 1 > /sys/kernel/debug/idle_debug/enable_idle Tested thoroughly using lookbusy to modulate system load and trigger idle states entry/exit. --- arch/arm/mach-vexpress/Kconfig | 14 ++ arch/arm/mach-vexpress/Makefile | 1 + arch/arm/mach-vexpress/cpuidle-tc2.c | 288 +++++++++++++++++++++++++++ arch/arm/mach-vexpress/hotplug-asm.S | 28 +++ arch/arm/mach-vexpress/tc2-sleep.S | 76 +++++++ 5 files changed, 407 insertions(+) create mode 100644 arch/arm/mach-vexpress/cpuidle-tc2.c create mode 100644 arch/arm/mach-vexpress/hotplug-asm.S create mode 100644 arch/arm/mach-vexpress/tc2-sleep.S diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 5907e10c37fd..6733f4719317 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -54,6 +54,20 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA build a working kernel, you must also enable relevant core tile support or Flattened Device Tree based support options. +config ARCH_VEXPRESS_TC2_PM + bool "Power Management Support for TC2 test-chip (EXPERIMENTAL)" + depends on CPU_IDLE && PM + select ARM_CPU_SUSPEND + select ARCH_NEEDS_CPU_IDLE_COUPLED + select ARM_SPC + select ARM_CCI + help + Provides code that enables CPU idle power management on the + TC2 testchip. It enables the CPU idle driver so that the kernel + can enter cluster power down states provided by the power + controller. Code is built on top of coupled C-state idle code + since all CPUs need to be idle to enter cluster shutdown. + config ARCH_VEXPRESS_CA9X4 bool "Versatile Express Cortex-A9x4 tile" diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 42703e8b4d3b..1f57882b46c7 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -8,3 +8,4 @@ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o +obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += cpuidle-tc2.o hotplug-asm.o tc2-sleep.o diff --git a/arch/arm/mach-vexpress/cpuidle-tc2.c b/arch/arm/mach-vexpress/cpuidle-tc2.c new file mode 100644 index 000000000000..de66243a7ef3 --- /dev/null +++ b/arch/arm/mach-vexpress/cpuidle-tc2.c @@ -0,0 +1,288 @@ +/* + * TC2 CPU idle driver. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Lorenzo Pieralisi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static int tc2_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + cpu_do_idle(); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + +static int tc2_enter_coupled(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx); + +static struct cpuidle_state tc2_cpuidle_set[] __initdata = { + [0] = { + .enter = tc2_cpuidle_simple_enter, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "WFI", + .desc = "ARM WFI", + }, + [1] = { + .enter = tc2_enter_coupled, + .exit_latency = 300, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_COUPLED, + .name = "C1", + .desc = "ARM power down", + }, +}; + +struct cpuidle_driver tc2_idle_driver = { + .name = "tc2_idle", + .owner = THIS_MODULE, + .safe_state_index = 0 +}; + +static DEFINE_PER_CPU(struct cpuidle_device, tc2_idle_dev); + +#define NR_CLUSTERS 2 +static cpumask_t cluster_mask = CPU_MASK_NONE; + +extern void disable_clean_inv_dcache(int); +static atomic_t abort_barrier[NR_CLUSTERS]; + +extern void tc2_cpu_resume(void); +extern void disable_snoops(void); + +static int notrace tc2_coupled_finisher(unsigned long arg) +{ + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cpu = smp_processor_id(); + unsigned int cluster = (mpidr >> 8) & 0xf; + unsigned int weight = cpumask_weight(topology_core_cpumask(cpu)); + u8 wfi_weight = 0; + + cpuidle_coupled_parallel_barrier((struct cpuidle_device *)arg, + &abort_barrier[cluster]); + if (mpidr & 0xf) { + disable_clean_inv_dcache(0); + wfi(); + /* not reached */ + } + + while (wfi_weight != (weight - 1)) { + wfi_weight = vexpress_spc_wfi_cpustat(cluster); + wfi_weight = hweight8(wfi_weight); + } + + vexpress_spc_powerdown_enable(cluster, 1); + disable_clean_inv_dcache(1); + disable_cci(cluster); + disable_snoops(); + return 1; +} + +/* + * tc2_enter_coupled - Programs CPU to enter the specified state + * @dev: cpuidle device + * @drv: The target state to be programmed + * @idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int tc2_enter_coupled(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + struct timespec ts_preidle, ts_postidle, ts_idle; + int ret; + int cluster = (read_cpuid_mpidr() >> 8) & 0xf; + /* Used to keep track of the total time in idle */ + getnstimeofday(&ts_preidle); + + if (!cpu_isset(cluster, cluster_mask)) { + cpuidle_coupled_parallel_barrier(dev, + &abort_barrier[cluster]); + goto shallow_out; + } + + BUG_ON(!irqs_disabled()); + + cpu_pm_enter(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + + ret = cpu_suspend((unsigned long) dev, tc2_coupled_finisher); + + if (ret) + BUG(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + cpu_pm_exit(); + +shallow_out: + getnstimeofday(&ts_postidle); + ts_idle = timespec_sub(ts_postidle, ts_preidle); + + dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC + + ts_idle.tv_sec * USEC_PER_SEC; + return idx; +} + +static int idle_mask_show(struct seq_file *f, void *p) +{ + char buf[256]; + bitmap_scnlistprintf(buf, 256, cpumask_bits(&cluster_mask), + NR_CLUSTERS); + + seq_printf(f, "%s\n", buf); + + return 0; +} + +static int idle_mask_open(struct inode *inode, struct file *file) +{ + return single_open(file, idle_mask_show, inode->i_private); +} + +static const struct file_operations cpuidle_fops = { + .open = idle_mask_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int idle_debug_set(void *data, u64 val) +{ + if (val >= (unsigned)NR_CLUSTERS && val != 0xff) { + pr_warning("Wrong parameter passed\n"); + return -EINVAL; + } + cpuidle_pause_and_lock(); + if (val == 0xff) + cpumask_clear(&cluster_mask); + else + cpumask_set_cpu(val, &cluster_mask); + + cpuidle_resume_and_unlock(); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(idle_debug_fops, NULL, idle_debug_set, "%llu\n"); + +/* + * tc2_idle_init + * + * Registers the TC2 specific cpuidle driver with the cpuidle + * framework with the valid set of states. + */ +int __init tc2_idle_init(void) +{ + struct cpuidle_device *dev; + int i, cpu_id; + struct dentry *idle_debug, *file_debug; + struct cpuidle_driver *drv = &tc2_idle_driver; + + drv->state_count = (sizeof(tc2_cpuidle_set) / + sizeof(struct cpuidle_state)); + + for (i = 0; i < drv->state_count; i++) { + memcpy(&drv->states[i], &tc2_cpuidle_set[i], + sizeof(struct cpuidle_state)); + } + + cpuidle_register_driver(drv); + + for_each_cpu(cpu_id, cpu_online_mask) { + pr_err("CPUidle for CPU%d registered\n", cpu_id); + dev = &per_cpu(tc2_idle_dev, cpu_id); + dev->cpu = cpu_id; + dev->safe_state_index = 0; + + cpumask_copy(&dev->coupled_cpus, + topology_core_cpumask(cpu_id)); + dev->state_count = drv->state_count; + + if (cpuidle_register_device(dev)) { + printk(KERN_ERR "%s: Cpuidle register device failed\n", + __func__); + return -EIO; + } + } + + idle_debug = debugfs_create_dir("idle_debug", NULL); + + if (IS_ERR_OR_NULL(idle_debug)) { + printk(KERN_INFO "Error in creating idle debugfs directory\n"); + return 0; + } + + file_debug = debugfs_create_file("enable_idle", S_IRUGO | S_IWGRP, + idle_debug, NULL, &idle_debug_fops); + + if (IS_ERR_OR_NULL(file_debug)) { + printk(KERN_INFO "Error in creating enable_idle file\n"); + return 0; + } + + file_debug = debugfs_create_file("enable_mask", S_IRUGO | S_IWGRP, + idle_debug, NULL, &cpuidle_fops); + + if (IS_ERR_OR_NULL(file_debug)) + printk(KERN_INFO "Error in creating enable_mask file\n"); + + /* enable all wake-up IRQs by default */ + vexpress_spc_set_wake_intr(0x7ff); + vexpress_flags_set(virt_to_phys(tc2_cpu_resume)); + + /* + * Enable idle by default for all possible clusters. + * This must be done after all other setup to prevent the + * possibility of clusters being powered down before they + * are fully configured. + */ + for (i = 0; i < NR_CLUSTERS; i++) + cpumask_set_cpu(i, &cluster_mask); + + return 0; +} + +late_initcall(tc2_idle_init); diff --git a/arch/arm/mach-vexpress/hotplug-asm.S b/arch/arm/mach-vexpress/hotplug-asm.S new file mode 100644 index 000000000000..f63472edcc44 --- /dev/null +++ b/arch/arm/mach-vexpress/hotplug-asm.S @@ -0,0 +1,28 @@ +#include +#include + + .text +ENTRY(disable_clean_inv_dcache) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + + mrc p15, 0, r3, c1, c0, 0 + bic r3, #4 @ clear C bit + mcr p15, 0, r3, c1, c0, 0 + dsb + isb + mov r12, r0 + cmp r12, #0 + bleq v7_flush_dcache_louis + cmp r12, #0 + blne v7_flush_dcache_all + clrex + mrc p15, 0, r3, c1, c0, 1 + bic r3, #0x40 @ clear SMP bit + mcr p15, 0, r3, c1, c0, 1 + isb + dsb + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(disable_clean_inv_dcache) diff --git a/arch/arm/mach-vexpress/tc2-sleep.S b/arch/arm/mach-vexpress/tc2-sleep.S new file mode 100644 index 000000000000..9bf8348fcc8d --- /dev/null +++ b/arch/arm/mach-vexpress/tc2-sleep.S @@ -0,0 +1,76 @@ +#include + +#define SPC_PHYS_BASE 0x7FFF0000 +#define A15_CONF 0x400 + +ENTRY(tc2_cpu_resume) + mrc p15, 0, r0, c0, c0, 5 + ands r0, r0, #0xff00 + ldr r1, =SPC_PHYS_BASE + mov r2, #A15_CONF + add r1, r1, r2 + ldr r1, [r1] + and r1, r1, #0x7 + cmp r1, r0, lsr #8 + adr r0, value + addne r0, r0, #16 + ldmia r0, {r1, r2, r3, r4} @ CCI address, SCC snoop control & val + mvn r3, r3 @ undo actions done at shutdown + ldr r0, [r2] + and r5, r0, r3 + str r5, [r2] + mov r0, #3 @ enable CCI for the cluster + str r0, [r1] + adr r1, cci_ctrl + ldr r1, [r1] +loop: + ldr r0, [r1] + ands r0, r0, #1 + bne loop + mov r0, #0 @ disable power down enable + str r0, [r4] + b cpu_resume +ENDPROC(tc2_cpu_resume) + +ENTRY(disable_snoops) + mrc p15, 0, r0, c0, c0, 5 + ands r0, r0, #0xff00 + ldr r1, scc_ptr + ldr r1, [r1] + mov r2, #A15_CONF + add r1, r1, r2 + ldr r1, [r1] + and r1, r1, #0x7 + cmp r1, r0, lsr #8 + adr r0, vvalue + addne r0, r0, #8 + ldmia r0, {r2, r3} @ CCI address, SCC snoop control & val + ldr r1, scc_ptr + ldr r1, [r1] + add r2, r1, r2 + ldr r0, [r2] + orr r0, r0, r3 + dsb + isb + str r0, [r2] + wfi +ENDPROC(disable_snoops) + +cci_ctrl: + .long 0x2c09000c +value: + .long 0x2c094000 + .long 0x7fff0404 + .long 0x180 + .long 0x7fff0b30 + .long 0x2c095000 + .long 0x7fff0504 + .long 0x2000 + .long 0x7fff0b34 +vvalue: + .long 0x404 + .long 0x180 + .long 0x504 + .long 0x2000 +scc_ptr: + .long vscc -- 2.34.1