diff options
| author | Meizu OpenSource <patchwork@meizu.com> | 2016-08-15 10:19:42 +0800 |
|---|---|---|
| committer | Meizu OpenSource <patchwork@meizu.com> | 2016-08-15 10:19:42 +0800 |
| commit | d2e1446d81725c351dc73a03b397ce043fb18452 (patch) | |
| tree | 4dbc616b7f92aea39cd697a9084205ddb805e344 /arch/arm64/kernel | |
first commit
Diffstat (limited to 'arch/arm64/kernel')
54 files changed, 15388 insertions, 0 deletions
diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore new file mode 100644 index 000000000..c5f676c3c --- /dev/null +++ b/arch/arm64/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile new file mode 100644 index 000000000..60d874a7e --- /dev/null +++ b/arch/arm64/kernel/Makefile @@ -0,0 +1,39 @@ +# +# Makefile for the linux kernel. +# + +CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg + +# Object file lists. +arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ + entry-fpsimd.o process.o ptrace.o setup.o signal.o \ + sys.o stacktrace.o time.o traps.o io.o vdso.o \ + hyp-stub.o psci.o opcodes.o cpu_ops.o insn.o return_address.o + +arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ + sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o +arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o + +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o +obj-y += $(arm64-obj-y) vdso/ +obj-m += $(arm64-obj-m) +head-y := head.o +extra-y := $(head-y) vmlinux.lds + +# vDSO - this must be built first to generate the symbol offsets +$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h +$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c new file mode 100644 index 000000000..ef9b63dc0 --- /dev/null +++ b/arch/arm64/kernel/arm64ksyms.c @@ -0,0 +1,64 @@ +/* + * Based on arch/arm/kernel/armksyms.c + * + * Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/cryptohash.h> +#include <linux/delay.h> +#include <linux/in6.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/io.h> + +#include <asm/checksum.h> + + /* user mem (segment) */ +EXPORT_SYMBOL(__strnlen_user); +EXPORT_SYMBOL(__strncpy_from_user); + +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_page); + +EXPORT_SYMBOL(__copy_from_user); +EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__clear_user); + + /* physical memory */ +EXPORT_SYMBOL(memstart_addr); + + /* string / mem functions */ +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memchr); + + /* atomic bitops */ +EXPORT_SYMBOL(set_bit); +EXPORT_SYMBOL(test_and_set_bit); +EXPORT_SYMBOL(clear_bit); +EXPORT_SYMBOL(test_and_clear_bit); +EXPORT_SYMBOL(change_bit); +EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c new file mode 100644 index 000000000..09e4f45f8 --- /dev/null +++ b/arch/arm64/kernel/asm-offsets.c @@ -0,0 +1,157 @@ +/* + * Based on arch/arm/kernel/asm-offsets.c + * + * Copyright (C) 1995-2003 Russell King + * 2001-2002 Keith Owens + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/vdso_datapage.h> +#include <linux/kbuild.h> + +int main(void) +{ + DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + BLANK(); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); + DEFINE(TI_TASK, offsetof(struct thread_info, task)); + DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_CPU_EXCP, offsetof(struct thread_info, cpu_excp)); + DEFINE(TI_REGS_ON_EXCP, offsetof(struct thread_info, regs_on_excp)); + BLANK(); + DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); + BLANK(); + DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); + DEFINE(S_X1, offsetof(struct pt_regs, regs[1])); + DEFINE(S_X2, offsetof(struct pt_regs, regs[2])); + DEFINE(S_X3, offsetof(struct pt_regs, regs[3])); + DEFINE(S_X4, offsetof(struct pt_regs, regs[4])); + DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); + DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); + DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X15, offsetof(struct pt_regs, regs[15])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X29, offsetof(struct pt_regs, regs[29])); + DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); + DEFINE(S_SP, offsetof(struct pt_regs, sp)); +#ifdef CONFIG_COMPAT + DEFINE(S_COMPAT_SP, offsetof(struct pt_regs, compat_sp)); +#endif + DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); + DEFINE(S_PC, offsetof(struct pt_regs, pc)); + DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); + DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + BLANK(); + DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); + DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); + BLANK(); + DEFINE(VM_EXEC, VM_EXEC); + BLANK(); + DEFINE(PAGE_SZ, PAGE_SIZE); + BLANK(); + DEFINE(CPU_INFO_SZ, sizeof(struct cpu_info)); + DEFINE(CPU_INFO_SETUP, offsetof(struct cpu_info, cpu_setup)); + BLANK(); + DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); + DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); + DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); + BLANK(); + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); + DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); + DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); + BLANK(); + DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); + DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); + DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); + DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); + DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); + DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); + DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); + BLANK(); + DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec)); + BLANK(); + DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif + return 0; +} diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 000000000..eeb1d6c41 --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,105 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; +#ifdef CONFIG_MTK_PSCI +extern const struct cpu_operations mt_cpu_psci_ops; +#endif + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#ifdef CONFIG_MTK_PSCI + &mt_cpu_psci_ops, +#endif +#endif + NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_err("%s: invalid enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c new file mode 100644 index 000000000..fd3993cb0 --- /dev/null +++ b/arch/arm64/kernel/cputable.c @@ -0,0 +1,33 @@ +/* + * arch/arm64/kernel/cputable.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> + +#include <asm/cputable.h> + +extern unsigned long __cpu_setup(void); + +struct cpu_info cpu_table[] = { + { + .cpu_id_val = 0x000f0000, + .cpu_id_mask = 0x000f0000, + .cpu_name = "AArch64 Processor", + .cpu_setup = __cpu_setup, + }, + { /* Empty */ }, +}; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c new file mode 100644 index 000000000..7b816886d --- /dev/null +++ b/arch/arm64/kernel/debug-monitors.c @@ -0,0 +1,431 @@ +/* + * ARMv8 single-step debug support and mdscr context switching. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/ptrace.h> +#include <linux/stat.h> +#include <linux/uaccess.h> + +#include <asm/debug-monitors.h> +#include <asm/local.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + +/* Determine debug architecture. */ +u8 debug_monitors_arch(void) +{ + return read_cpuid(ID_AA64DFR0_EL1) & 0xf; +} + +/* + * MDSCR access routines. + */ +static void mdscr_write(u32 mdscr) +{ + unsigned long flags; + local_dbg_save(flags); + asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + local_dbg_restore(flags); +} + +static u32 mdscr_read(void) +{ + u32 mdscr; + asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); + return mdscr; +} + +/* + * Allow root to disable self-hosted debug from userspace. + * This is useful if you want to connect an external JTAG debugger. + */ +static u32 debug_enabled = 1; + +static int create_debug_debugfs_entry(void) +{ + debugfs_create_bool("debug_enabled", 0644, NULL, &debug_enabled); + return 0; +} +fs_initcall(create_debug_debugfs_entry); + +static int __init early_debug_disable(char *buf) +{ + debug_enabled = 0; + return 0; +} + +early_param("nodebugmon", early_debug_disable); + +/* + * Keep track of debug users on each core. + * The ref counts are per-cpu so we use a local_t type. + */ +static DEFINE_PER_CPU(local_t, mde_ref_count); +static DEFINE_PER_CPU(local_t, kde_ref_count); + +void enable_debug_monitors(enum debug_el el) +{ + u32 mdscr, enable = 0; + + WARN_ON(preemptible()); + + if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) + enable = DBG_MDSCR_MDE; + + if (el == DBG_ACTIVE_EL1 && + local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) + enable |= DBG_MDSCR_KDE; + + if (enable && debug_enabled) { + mdscr = mdscr_read(); + mdscr |= enable; + mdscr_write(mdscr); + } +} + +void disable_debug_monitors(enum debug_el el) +{ + u32 mdscr, disable = 0; + + WARN_ON(preemptible()); + + if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) + disable = ~DBG_MDSCR_MDE; + + if (el == DBG_ACTIVE_EL1 && + local_dec_and_test(&__get_cpu_var(kde_ref_count))) + disable &= ~DBG_MDSCR_KDE; + + if (disable) { + mdscr = mdscr_read(); + mdscr &= disable; + mdscr_write(mdscr); + } +} + +/* + * OS lock clearing. + */ +static void clear_os_lock(void *unused) +{ + asm volatile("msr oslar_el1, %0" : : "r" (0)); +} + +static int __cpuinit os_lock_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + int cpu = (unsigned long)data; + if (action == CPU_ONLINE) + smp_call_function_single(cpu, clear_os_lock, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata os_lock_nb = { + .notifier_call = os_lock_notify, +}; + +static int __cpuinit debug_monitors_init(void) +{ + /* Clear the OS lock. */ + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); + + /* Register hotplug handler. */ + register_cpu_notifier(&os_lock_nb); + return 0; +} +postcore_initcall(debug_monitors_init); + +/* + * Single step API and exception handling. + */ +static void set_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + spsr |= DBG_SPSR_SS; + regs->pstate = spsr; +} + +static void clear_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + regs->pstate = spsr; +} + +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_add(&hook->node, &step_hook); + write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_del(&hook->node); + write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ + struct step_hook *hook; + int retval = DBG_HOOK_ERROR; + + read_lock(&step_hook_lock); + + list_for_each_entry(hook, &step_hook, node) { + retval = hook->fn(regs, esr); + if (retval == DBG_HOOK_HANDLED) + break; + } + + read_unlock(&step_hook_lock); + + return retval; +} + +static int single_step_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + /* + * If we are stepping a pending breakpoint, call the hw_breakpoint + * handler first. + */ + if (!reinstall_suspended_bps(regs)) + return 0; + + if (user_mode(regs)) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)instruction_pointer(regs); + force_sig_info(SIGTRAP, &info, current); + + /* + * ptrace will disable single step unless explicitly + * asked to re-enable it. For other clients, it makes + * sense to leave it enabled (i.e. rewind the controls + * to the active-not-pending state). + */ + user_rewind_single_step(current); + } else { + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + + pr_warning("Unexpected kernel single-step exception at EL1\n"); + /* + * Re-enable stepping since we know that we will be + * returning to regs. + */ + set_regs_spsr_ss(regs); + } + + return 0; +} + +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_add(&hook->node, &break_hook); + write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_del(&hook->node); + write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ + struct break_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + + read_lock(&break_hook_lock); + list_for_each_entry(hook, &break_hook, node) + if ((esr & hook->esr_mask) == hook->esr_val) + fn = hook->fn; + read_unlock(&break_hook_lock); + + return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); + return -EFAULT; + } + + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) +{ + hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, + TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); + return 0; +} +arch_initcall(debug_traps_init); + +/* Re-enable single step for syscall restarting. */ +void user_rewind_single_step(struct task_struct *task) +{ + /* + * If single step is active for this thread, then set SPSR.SS + * to 1 to avoid returning to the active-pending state. + */ + if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_fastforward_single_step(struct task_struct *task) +{ + if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + clear_regs_spsr_ss(task_pt_regs(task)); +} + +/* Kernel API */ +void kernel_enable_single_step(struct pt_regs *regs) +{ + WARN_ON(!irqs_disabled()); + set_regs_spsr_ss(regs); + mdscr_write(mdscr_read() | DBG_MDSCR_SS); + enable_debug_monitors(DBG_ACTIVE_EL1); +} + +void kernel_disable_single_step(void) +{ + WARN_ON(!irqs_disabled()); + mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); + disable_debug_monitors(DBG_ACTIVE_EL1); +} + +int kernel_active_single_step(void) +{ + WARN_ON(!irqs_disabled()); + return mdscr_read() & DBG_MDSCR_SS; +} + +/* ptrace API */ +void user_enable_single_step(struct task_struct *task) +{ + set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); + set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_disable_single_step(struct task_struct *task) +{ + clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); +} diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c new file mode 100644 index 000000000..fbb6e1843 --- /dev/null +++ b/arch/arm64/kernel/early_printk.c @@ -0,0 +1,154 @@ +/* + * Earlyprintk support. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/console.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/io.h> + +#include <linux/amba/serial.h> +#include <linux/serial_reg.h> + +static void __iomem *early_base; +static void (*printch)(char ch); + +/* + * PL011 single character TX. + */ +static void pl011_printch(char ch) +{ + while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_TXFF) + ; + writeb_relaxed(ch, early_base + UART01x_DR); + while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_BUSY) + ; +} + +/* + * Semihosting-based debug console + */ +static void smh_printch(char ch) +{ + asm volatile("mov x1, %0\n" + "mov x0, #3\n" + "hlt 0xf000\n" + : : "r" (&ch) : "x0", "x1", "memory"); +} + +/* + * 8250/16550 (8-bit aligned registers) single character TX. + */ +static void uart8250_8bit_printch(char ch) +{ + while (!(readb_relaxed(early_base + UART_LSR) & UART_LSR_THRE)) + ; + writeb_relaxed(ch, early_base + UART_TX); +} + +/* + * 8250/16550 (32-bit aligned registers) single character TX. + */ +static void uart8250_32bit_printch(char ch) +{ + while (!(readl_relaxed(early_base + (UART_LSR << 2)) & UART_LSR_THRE)) + ; + writel_relaxed(ch, early_base + (UART_TX << 2)); +} + +struct earlycon_match { + const char *name; + void (*printch)(char ch); +}; + +static const struct earlycon_match earlycon_match[] __initconst = { + { .name = "pl011", .printch = pl011_printch, }, + { .name = "smh", .printch = smh_printch, }, + { .name = "uart8250-8bit", .printch = uart8250_8bit_printch, }, + { .name = "uart8250-32bit", .printch = uart8250_32bit_printch, }, + {} +}; + +static void early_write(struct console *con, const char *s, unsigned n) +{ + while (n-- > 0) { + if (*s == '\n') + printch('\r'); + printch(*s); + s++; + } +} + +static struct console early_console_dev = { + .name = "earlycon", + .write = early_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1, +}; + +/* + * Parse earlyprintk=... parameter in the format: + * + * <name>[,<addr>][,<options>] + * + * and register the early console. It is assumed that the UART has been + * initialised by the bootloader already. + */ +static int __init setup_early_printk(char *buf) +{ + const struct earlycon_match *match = earlycon_match; + phys_addr_t paddr = 0; + + if (!buf) { + pr_warning("No earlyprintk arguments passed.\n"); + return 0; + } + + while (match->name) { + size_t len = strlen(match->name); + if (!strncmp(buf, match->name, len)) { + buf += len; + break; + } + match++; + } + if (!match->name) { + pr_warning("Unknown earlyprintk arguments: %s\n", buf); + return 0; + } + + /* I/O address */ + if (!strncmp(buf, ",0x", 3)) { + char *e; + paddr = simple_strtoul(buf + 1, &e, 16); + buf = e; + } + /* no options parsing yet */ + + if (paddr) + early_base = early_io_map(paddr, EARLYCON_IOBASE); + + printch = match->printch; + early_console = &early_console_dev; + register_console(&early_console_dev); + + return 0; +} + +early_param("earlyprintk", setup_early_printk); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S new file mode 100644 index 000000000..d358ccacf --- /dev/null +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -0,0 +1,67 @@ +/* + * FP/SIMD state saving and restoring + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/fpsimdmacros.h> + +/* + * Save the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_save_state) + fpsimd_save x0, 8 + ret +ENDPROC(fpsimd_save_state) + +/* + * Load the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_load_state) + fpsimd_restore x0, 8 + ret +ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 000000000..b051871f2 --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ldr x0, =ftrace_trace_stop + ldr x0, [x0] // if ftrace_trace_stop + ret // return; +#endif + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S new file mode 100644 index 000000000..e9fa68252 --- /dev/null +++ b/arch/arm64/kernel/entry.S @@ -0,0 +1,836 @@ +/* + * Low-level exception handling code + * + * Copyright (C) 2012 ARM Ltd. + * Authors: Catalin Marinas <catalin.marinas@arm.com> + * Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/errno.h> +#include <asm/esr.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +/* + * Bad Abort numbers + *----------------- + */ +#define BAD_SYNC 0 +#define BAD_IRQ 1 +#define BAD_FIQ 2 +#define BAD_ERROR 3 + + .macro kernel_entry, el, regsize = 64 + sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR + .if \regsize == 32 + mov w0, w0 // zero upper 32 bits of x0 + .endif + push x28, x29 + push x26, x27 + push x24, x25 + push x22, x23 + push x20, x21 + push x18, x19 + push x16, x17 + push x14, x15 + push x12, x13 + push x10, x11 + push x8, x9 + push x6, x7 + push x4, x5 + push x2, x3 + push x0, x1 + .if \el == 0 + mrs x21, sp_el0 + .else + add x21, sp, #S_FRAME_SIZE + .endif + mrs x22, elr_el1 + mrs x23, spsr_el1 + stp lr, x21, [sp, #S_LR] + stp x22, x23, [sp, #S_PC] + + /* + * Set syscallno to -1 by default (overridden later if real syscall). + */ + .if \el == 0 + mvn x21, xzr + str x21, [sp, #S_SYSCALLNO] + .endif + + /* + * Registers that may be useful after this macro is invoked: + * + * x21 - aborted SP + * x22 - aborted PC + * x23 - aborted PSTATE + */ + .endm + +#ifdef CONFIG_MTK_COMPAT + .macro kernel_entry_compat + sub sp, sp, #S_FRAME_SIZE - S_X16 // room for LR, SP, SPSR, ELR + mov w0, w0 // zero upper 32 bits of x0 + + stp x14, x15, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x0, x1, [sp, #-16]! + + mrs x21, sp_el0 + mrs x22, elr_el1 + mrs x23, spsr_el1 + stp lr, x21, [sp, #S_LR] + stp x22, x23, [sp, #S_PC] + + /* + * Set syscallno to -1 by default (overridden later if real syscall). + */ + mvn x21, xzr + str x21, [sp, #S_SYSCALLNO] + + /* + * Registers that may be useful after this macro is invoked: + * + * x21 - aborted SP + * x22 - aborted PC + * x23 - aborted PSTATE + */ + .endm +#endif + + .macro kernel_exit, el, ret = 0 + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR + .if \el == 0 + ldr x23, [sp, #S_SP] // load return stack pointer + .endif + .if \ret + ldr x1, [sp, #S_X1] // preserve x0 (syscall return) + add sp, sp, S_X2 + .else + pop x0, x1 + .endif + pop x2, x3 // load the rest of the registers + pop x4, x5 + pop x6, x7 + pop x8, x9 + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 + .if \el == 0 + msr sp_el0, x23 +#ifdef CONFIG_ARM64_ERRATUM_845719 + tbz x22, #4, 1f +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrs x29, contextidr_el1 + msr contextidr_el1, x29 +#else + msr contextidr_el1, xzr +#endif +1: +#endif + .endif + pop x10, x11 + pop x12, x13 + pop x14, x15 + pop x16, x17 + pop x18, x19 + pop x20, x21 + pop x22, x23 + pop x24, x25 + pop x26, x27 + pop x28, x29 + ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + eret // return to kernel + .endm + +#ifdef CONFIG_MTK_COMPAT + .macro kernel_exit_compat, ret = 0 + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR + ldr x23, [sp, #S_SP] // load return stack pointer + .if \ret + ldr x1, [sp, #S_X1] // preserve x0 (syscall return) + add sp, sp, S_X2 + .else + ldp x0, x1, [sp], #16 + .endif + ldp x2, x3, [sp], #16 // load the rest of the registers + ldp x4, x5, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x8, x9, [sp], #16 + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 + msr sp_el0, x23 + ldp x10, x11, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x14, x15, [sp], #16 + tbnz x22, #4, 1f + + ldp x16, x17, [sp], #16 + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + eret // return to kernel + // could not run here + +1: +#ifdef CONFIG_ARM64_ERRATUM_845719 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrs x29, contextidr_el1 + msr contextidr_el1, x29 +#else + msr contextidr_el1, xzr +#endif +#endif + add sp, sp, #S_X29-S_X15 + ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + eret // return to kernel + .endm +#endif + + .macro get_thread_info, rd + mov \rd, sp + and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + .endm + +/* + * These are the registers used in the syscall handler, and allow us to + * have in theory up to 7 arguments to a function - x0 to x6. + * + * x7 is reserved for the system call number in 32-bit mode. + */ +sc_nr .req x25 // number of system calls +scno .req x26 // syscall number +stbl .req x27 // syscall table pointer +tsk .req x28 // current thread_info + +/* + * Interrupt handling. + */ + .macro irq_handler + ldr x1, handle_arch_irq + mov x0, sp + blr x1 + .endm + + .text + +/* + * Exception vectors. + */ + + .align 11 +ENTRY(vectors) + ventry el1_sync_invalid // Synchronous EL1t + ventry el1_irq_invalid // IRQ EL1t + ventry el1_fiq_invalid // FIQ EL1t + ventry el1_error_invalid // Error EL1t + + ventry el1_sync // Synchronous EL1h + ventry el1_irq // IRQ EL1h + ventry el1_fiq_invalid // FIQ EL1h + ventry el1_error_invalid // Error EL1h + + ventry el0_sync // Synchronous 64-bit EL0 + ventry el0_irq // IRQ 64-bit EL0 + ventry el0_fiq_invalid // FIQ 64-bit EL0 + ventry el0_error_invalid // Error 64-bit EL0 + +#ifdef CONFIG_COMPAT + ventry el0_sync_compat // Synchronous 32-bit EL0 + ventry el0_irq_compat // IRQ 32-bit EL0 + ventry el0_fiq_invalid_compat // FIQ 32-bit EL0 + ventry el0_error_invalid_compat // Error 32-bit EL0 +#else + ventry el0_sync_invalid // Synchronous 32-bit EL0 + ventry el0_irq_invalid // IRQ 32-bit EL0 + ventry el0_fiq_invalid // FIQ 32-bit EL0 + ventry el0_error_invalid // Error 32-bit EL0 +#endif +END(vectors) + +/* + * Invalid mode handlers + */ + .macro inv_entry, el, reason, regsize = 64 + kernel_entry el, \regsize + mov x0, sp + mov x1, #\reason + mrs x2, esr_el1 + b bad_mode + .endm + +el0_sync_invalid: + inv_entry 0, BAD_SYNC +ENDPROC(el0_sync_invalid) + +el0_irq_invalid: + inv_entry 0, BAD_IRQ +ENDPROC(el0_irq_invalid) + +el0_fiq_invalid: + inv_entry 0, BAD_FIQ +ENDPROC(el0_fiq_invalid) + +el0_error_invalid: + inv_entry 0, BAD_ERROR +ENDPROC(el0_error_invalid) + +#ifdef CONFIG_COMPAT +el0_fiq_invalid_compat: + inv_entry 0, BAD_FIQ, 32 +ENDPROC(el0_fiq_invalid_compat) + +el0_error_invalid_compat: + inv_entry 0, BAD_ERROR, 32 +ENDPROC(el0_error_invalid_compat) +#endif + +el1_sync_invalid: + inv_entry 1, BAD_SYNC +ENDPROC(el1_sync_invalid) + +el1_irq_invalid: + inv_entry 1, BAD_IRQ +ENDPROC(el1_irq_invalid) + +el1_fiq_invalid: + inv_entry 1, BAD_FIQ +ENDPROC(el1_fiq_invalid) + +el1_error_invalid: + inv_entry 1, BAD_ERROR +ENDPROC(el1_error_invalid) + +/* + * EL1 mode handlers. + */ + .align 6 +el1_sync: + kernel_entry 1 + mov x0, sp + and x20, x0, #0xffffffffffffc000 + ldr w4, [x20, #TI_CPU_EXCP] + add w4, w4, #0x1 + str w4, [x20, #TI_CPU_EXCP] + cmp w4, #0x1 + b.ne el1_sync_nest + str x0, [x20, #TI_REGS_ON_EXCP] +el1_sync_nest: + cmp w4, #0x2 + b.lt el1_sync_nest_skip + bl aee_stop_nested_panic +el1_sync_nest_skip: + mrs x1, esr_el1 // read the syndrome register + lsr x24, x1, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_DABT_EL1 // data abort in EL1 + b.eq el1_da + cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + b.eq el1_undef + cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + b.eq el1_sp_pc + cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + b.eq el1_sp_pc + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL1 + b.eq el1_undef + cmp x24, #ESR_EL1_EC_BREAKPT_EL1 // debug exception in EL1 + b.ge el1_dbg + b el1_inv +el1_da: + /* + * Data abort handling + */ + mrs x0, far_el1 + enable_dbg_if_not_stepping x2 + // re-enable interrupts if they were enabled in the aborted context + tbnz x23, #7, 1f // PSR_I_BIT + enable_irq +1: + mov x2, sp // struct pt_regs + bl do_mem_abort + mov x5, sp + and x20, x5, #0xffffffffffffc000 + ldr w4, [x20, #TI_CPU_EXCP] + sub w4, w4, #0x1 + str w4, [x20, #TI_CPU_EXCP] + + // disable interrupts before pulling preserved data off the stack + disable_irq + kernel_exit 1 +el1_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x0, far_el1 + mov x2, sp + b do_sp_pc_abort +el1_undef: + /* + * Undefined instruction + */ + mov x0, sp + bl do_undefinstr + kernel_exit 1 +el1_dbg: + /* + * Debug exception handling + */ + cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cinc x24, x24, eq // set bit '0' + tbz x24, #0, el1_inv // EL1 only + mrs x0, far_el1 + mov x2, sp // struct pt_regs + bl do_debug_exception + mov x5, sp + and x20, x5, #0xffffffffffffc000 + ldr w4, [x20, #TI_CPU_EXCP] + sub w4, w4, #0x1 + str w4, [x20, #TI_CPU_EXCP] + + kernel_exit 1 +el1_inv: + // TODO: add support for undefined instructions in kernel mode + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + b bad_mode +ENDPROC(el1_sync) + + .align 6 +el1_irq: + kernel_entry 1 + enable_dbg_if_not_stepping x0 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + bl MT_trace_hardirqs_off +#ifdef CONFIG_PREEMPT + get_thread_info tsk + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w0, w24, #1 // increment it + str w0, [tsk, #TI_PREEMPT] +#endif + irq_handler +#ifdef CONFIG_PREEMPT + str w24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz w24, 1f // preempt count != 0 + ldr x0, [tsk, #TI_FLAGS] // get flags + tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? + bl el1_preempt +1: +#endif + bl MT_trace_hardirqs_on +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on +#endif + kernel_exit 1 +ENDPROC(el1_irq) + +#ifdef CONFIG_PREEMPT +el1_preempt: + mov x24, lr +1: enable_dbg + bl preempt_schedule_irq // irq en/disable is done inside + ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS + tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? + ret x24 +#endif + +/* + * EL0 mode handlers. + */ + .align 6 +el0_sync: + kernel_entry 0 + mrs x25, esr_el1 // read the syndrome register + lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state + b.eq el0_svc + adr lr, ret_from_exception + cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + b.eq el0_da + cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + b.eq el0_ia + cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + b.eq el0_fpsimd_acc + cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception + b.eq el0_fpsimd_exc + cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + b.eq el0_sp_pc + cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + b.eq el0_sp_pc + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + b.eq el0_undef + cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + b.ge el0_dbg + b el0_inv + +#ifdef CONFIG_COMPAT + .align 6 +el0_sync_compat: +#ifdef CONFIG_MTK_COMPAT + kernel_entry_compat +#else + kernel_entry 0, 32 +#endif + mrs x25, esr_el1 // read the syndrome register + lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state + b.eq el0_svc_compat + adr lr, ret_from_exception + cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + b.eq el0_da + cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + b.eq el0_ia + cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + b.eq el0_fpsimd_acc + cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception + b.eq el0_fpsimd_exc + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP15_32 // CP15 MRC/MCR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP15_64 // CP15 MRRC/MCRR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_MR // CP14 MRC/MCR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_LS // CP14 LDC/STC trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_64 // CP14 MRRC/MCRR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + b.ge el0_dbg + b el0_inv +el0_svc_compat: + /* + * AArch32 syscall handling + */ + adr stbl, compat_sys_call_table // load compat syscall table pointer + uxtw scno, w7 // syscall number in w7 (r7) + mov sc_nr, #__NR_compat_syscalls + b el0_svc_naked + + .align 6 +el0_irq_compat: +#ifdef CONFIG_MTK_COMPAT + kernel_entry_compat +#else + kernel_entry 0, 32 +#endif + b el0_irq_naked +#endif + +el0_da: + /* + * Data abort handling + */ + mrs x0, far_el1 + bic x0, x0, #(0xff << 56) + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + mov x1, x25 + mov x2, sp + b do_mem_abort +el0_ia: + /* + * Instruction abort handling + */ + mrs x0, far_el1 + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x2, sp + b do_mem_abort +el0_fpsimd_acc: + /* + * Floating Point or Advanced SIMD access + */ + mov x0, x25 + mov x1, sp + b do_fpsimd_acc +el0_fpsimd_exc: + /* + * Floating Point or Advanced SIMD exception + */ + mov x0, x25 + mov x1, sp + b do_fpsimd_exc +el0_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x0, far_el1 + disable_step x1 + isb + enable_dbg + // enable interrupts before calling the main handler + enable_irq + mov x1, x25 + mov x2, sp + b do_sp_pc_abort +el0_undef: + /* + * Undefined instruction + */ + mov x0, sp + b do_undefinstr +el0_dbg: + /* + * Debug exception handling + */ + tbnz x24, #0, el0_inv // EL0 only + mrs x0, far_el1 + disable_step x1 + mov x1, x25 + mov x2, sp + b do_debug_exception +el0_inv: + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + b bad_mode +ENDPROC(el0_sync) + + .align 6 +el0_irq: + kernel_entry 0 +el0_irq_naked: + disable_step x1 + isb + enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + bl MT_trace_hardirqs_off + get_thread_info tsk +#ifdef CONFIG_PREEMPT + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w23, w24, #1 // increment it + str w23, [tsk, #TI_PREEMPT] +#endif + irq_handler +#ifdef CONFIG_PREEMPT + ldr w0, [tsk, #TI_PREEMPT] + str w24, [tsk, #TI_PREEMPT] + cmp w0, w23 + b.eq 1f + mov x1, #0 + str x1, [x1] // BUG +1: +#endif + bl MT_trace_hardirqs_on +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on +#endif + b ret_to_user +ENDPROC(el0_irq) + +/* + * This is the return code to user mode for abort handlers + */ +ret_from_exception: + get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_exception) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + * x0 = previous task_struct (must be preserved across the switch) + * x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) + add x8, x0, #THREAD_CPU_CONTEXT + mov x9, sp + stp x19, x20, [x8], #16 // store callee-saved registers + stp x21, x22, [x8], #16 + stp x23, x24, [x8], #16 + stp x25, x26, [x8], #16 + stp x27, x28, [x8], #16 + stp x29, x9, [x8], #16 + str lr, [x8] + add x8, x1, #THREAD_CPU_CONTEXT + ldp x19, x20, [x8], #16 // restore callee-saved registers + ldp x21, x22, [x8], #16 + ldp x23, x24, [x8], #16 + ldp x25, x26, [x8], #16 + ldp x27, x28, [x8], #16 + ldp x29, x9, [x8], #16 + ldr lr, [x8] + mov sp, x9 + ret +ENDPROC(cpu_switch_to) + +/* + * This is the fast syscall return path. We do as little as possible here, + * and this includes saving x0 back into the kernel stack. + */ +ret_fast_syscall: + disable_irq // disable interrupts + ldr x1, [tsk, #TI_FLAGS] + and x2, x1, #_TIF_WORK_MASK + cbnz x2, fast_work_pending + tbz x1, #TIF_SINGLESTEP, fast_exit + disable_dbg + enable_step x2 +fast_exit: +#ifdef CONFIG_MTK_COMPAT + kernel_exit_compat ret = 1 +#else + kernel_exit 0, ret = 1 +#endif + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +fast_work_pending: + str x0, [sp, #S_X0] // returned x0 +work_pending: + tbnz x1, #TIF_NEED_RESCHED, work_resched + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ + ldr x2, [sp, #S_PSTATE] + mov x0, sp // 'regs' + tst x2, #PSR_MODE_MASK // user mode regs? + b.ne no_work_pending // returning to kernel + enable_irq // enable interrupts for do_notify_resume() + bl do_notify_resume + b ret_to_user +work_resched: + enable_dbg + bl schedule + +/* + * "slow" syscall return path. + */ +ret_to_user: + disable_irq // disable interrupts + ldr x1, [tsk, #TI_FLAGS] + and x2, x1, #_TIF_WORK_MASK + cbnz x2, work_pending + tbz x1, #TIF_SINGLESTEP, no_work_pending + disable_dbg + enable_step x2 +no_work_pending: +#ifdef CONFIG_MTK_COMPAT + kernel_exit_compat ret = 0 +#else + kernel_exit 0, ret = 0 +#endif +ENDPROC(ret_to_user) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) + bl schedule_tail + cbz x19, 1f // not a kernel thread + mov x0, x20 + blr x19 +1: get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_fork) + +/* + * SVC handler. + */ + .align 6 +el0_svc: + adrp stbl, sys_call_table // load syscall table pointer + uxtw scno, w8 // syscall number in w8 + mov sc_nr, #__NR_syscalls +el0_svc_naked: // compat entry point + stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number + disable_step x16 + isb + enable_dbg + enable_irq + + get_thread_info tsk + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace + adr lr, ret_fast_syscall // return address + cmp scno, sc_nr // check upper syscall limit + b.hs ni_sys + ldr x16, [stbl, scno, lsl #3] // address in the syscall table + br x16 // call sys_* routine +ni_sys: + mov x0, sp + b do_ni_syscall +ENDPROC(el0_svc) + + /* + * This is the really slow path. We're going to be doing context + * switches, and waiting for our parent to respond. + */ +__sys_trace: + mov x0, sp + bl syscall_trace_enter + adr lr, __sys_trace_return // return address + cmp w0, #RET_SKIP_SYSCALL_TRACE // skip syscall and tracing? + b.eq ret_to_user + cmp w0, #RET_SKIP_SYSCALL // skip syscall? + b.eq __sys_trace_return_skipped + uxtw scno, w0 // syscall number (possibly new) + mov x1, sp // pointer to regs + cmp scno, sc_nr // check upper syscall limit + b.hs ni_sys + ldp x0, x1, [sp] // restore the syscall args + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldr x16, [stbl, scno, lsl #3] // address in the syscall table + br x16 // call sys_* routine + +__sys_trace_return: + str x0, [sp] // save returned x0 +__sys_trace_return_skipped: // x0 already in regs[0] + mov x0, sp + bl syscall_trace_exit + b ret_to_user + +/* + * Special system call wrappers. + */ +ENTRY(sys_rt_sigreturn_wrapper) + mov x0, sp + b sys_rt_sigreturn +ENDPROC(sys_rt_sigreturn_wrapper) + +ENTRY(handle_arch_irq) + .quad 0 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c new file mode 100644 index 000000000..5aa531247 --- /dev/null +++ b/arch/arm64/kernel/fpsimd.c @@ -0,0 +1,342 @@ +/* + * FP/SIMD context switching and fault handling + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/hardirq.h> +#include <linux/cpu.h> + + +#include <asm/fpsimd.h> +#include <asm/cputype.h> + +#define FPEXC_IOF (1 << 0) +#define FPEXC_DZF (1 << 1) +#define FPEXC_OFF (1 << 2) +#define FPEXC_UFF (1 << 3) +#define FPEXC_IXF (1 << 4) +#define FPEXC_IDF (1 << 7) + +/* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + * the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's + * fpsimd_state.cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + * restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + * register contents to memory, clears the fpsimd_last_state per-cpu variable + * and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/* + * Trapped FP/ASIMD access. + */ +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +{ + /* TODO: implement lazy context saving/restoring */ + WARN_ON(1); +} + +/* + * Raise a SIGFPE for the current process. + */ +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +{ + siginfo_t info; + unsigned int si_code = 0; + + if (esr & FPEXC_IOF) + si_code = FPE_FLTINV; + else if (esr & FPEXC_DZF) + si_code = FPE_FLTDIV; + else if (esr & FPEXC_OFF) + si_code = FPE_FLTOVF; + else if (esr & FPEXC_UFF) + si_code = FPE_FLTUND; + else if (esr & FPEXC_IXF) + si_code = FPE_FLTRES; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGFPE; + info.si_code = si_code; + info.si_addr = (void __user *)instruction_pointer(regs); + + send_sig_info(SIGFPE, &info, current); +} + +void fpsimd_thread_switch(struct task_struct *next) +{ + /* + * Save the current FPSIMD state to memory, but only if whatever is in + * the registers is in fact the most recent userland FPSIMD state of + * 'current'. + */ + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + + if (next->mm) { + /* + * If we are switching to a task whose most recent userland + * FPSIMD state is already in the registers of *this* cpu, + * we can skip loading the state from memory. Otherwise, set + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ + struct fpsimd_state *st = &next->thread.fpsimd_state; + + if (__this_cpu_read(fpsimd_last_state) == st + && st->cpu == smp_processor_id()) + clear_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + else + set_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + } +} + +void fpsimd_flush_thread(void) +{ + preempt_disable(); + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + set_thread_flag(TIF_FOREIGN_FPSTATE); + preempt_enable(); +} + +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ + preempt_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + preempt_enable(); +} + +/* + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + fpsimd_load_state(st); + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_load_state(state); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ + t->thread.fpsimd_state.cpu = NR_CPUS; +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin_partial(u32 num_regs) +{ + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } +} +EXPORT_SYMBOL(kernel_neon_begin_partial); + +void kernel_neon_end(void) +{ + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + break; + case CPU_PM_EXIT: + if (current->mm) + set_thread_flag(TIF_FOREIGN_FPSTATE); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + +#ifdef CONFIG_MEDIATEK_SOLUTION +static int fpsimd_hotplug(struct notifier_block *b, unsigned long action, void *hcpu) +{ + if (action == CPU_DYING || action == CPU_DYING_FROZEN) { + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN){ + if (current->mm) + set_thread_flag(TIF_FOREIGN_FPSTATE); + } + + return NOTIFY_OK; +} +#endif + +/* + * FP/SIMD support code initialisation. + */ +static int __init fpsimd_init(void) +{ + u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + + if (pfr & (0xf << 16)) { + pr_notice("Floating-point is not implemented\n"); + return 0; + } + elf_hwcap |= HWCAP_FP; + + if (pfr & (0xf << 20)) + pr_notice("Advanced SIMD is not implemented\n"); + else + elf_hwcap |= HWCAP_ASIMD; + +#ifdef CONFIG_MEDIATEK_SOLUTION + hotcpu_notifier(fpsimd_hotplug, 0); +#endif + + return 0; +} +late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 000000000..7924d73b6 --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,176 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S new file mode 100644 index 000000000..f9c8478b8 --- /dev/null +++ b/arch/arm64/kernel/head.S @@ -0,0 +1,585 @@ +/* + * Low-level CPU initialisation + * Based on arch/arm/kernel/head.S + * + * Copyright (C) 1994-2002 Russell King + * Copyright (C) 2003-2012 ARM Ltd. + * Authors: Catalin Marinas <catalin.marinas@arm.com> + * Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/thread_info.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * swapper_pg_dir is the virtual address of the initial page table. We place + * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has + * 2 pages and is placed below swapper_pg_dir. + */ +#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) + +#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 +#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#endif + +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) + + .globl swapper_pg_dir + .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE + + .globl idmap_pg_dir + .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE + + .macro pgtbl, ttb0, ttb1, phys + add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE + sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .endm + +#ifdef CONFIG_ARM64_64K_PAGES +#define BLOCK_SHIFT PAGE_SHIFT +#define BLOCK_SIZE PAGE_SIZE +#else +#define BLOCK_SHIFT SECTION_SHIFT +#define BLOCK_SIZE SECTION_SIZE +#endif + +#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_END _end + +/* + * Initial memory map attributes. + */ +#ifndef CONFIG_SMP +#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF +#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF +#else +#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED +#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S +#endif + +#ifdef CONFIG_ARM64_64K_PAGES +#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS +#else +#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS +#endif + +/* + * Kernel startup entry point. + * --------------------------- + * + * The requirements are: + * MMU = off, D-cache = off, I-cache = on or off, + * x0 = physical address to the FDT blob. + * + * This code is mostly position independent so you call this at + * __pa(PAGE_OFFSET + TEXT_OFFSET). + * + * Note that the callee-saved registers are used for storing variables + * that are useful before the MMU is enabled. The allocations are described + * in the entry routines. + */ + __HEAD + + /* + * DO NOT MODIFY. Image header expected by Linux boot-loaders. + */ + b stext // branch to kernel start, magic + .long 0 // reserved + .quad TEXT_OFFSET // Image load offset from start of RAM + .quad 0 // reserved + .quad 0 // reserved + +ENTRY(stext) + mov x21, x0 // x21=FDT + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl el2_setup // Drop to EL1 + mrs x22, midr_el1 // x22=cpuid + mov x0, x22 + bl lookup_processor_type + mov x23, x0 // x23=current cpu_table + cbz x23, __error_p // invalid processor (x23=0)? + bl __vet_fdt + bl __create_page_tables // x25=TTBR0, x26=TTBR1 + /* + * The following calls CPU specific code in a position independent + * manner. See arch/arm64/mm/proc.S for details. x23 = base of + * cpu_info structure selected by lookup_processor_type above. + * On return, the CPU will be ready for the MMU to be turned on and + * the TCR will have been set. + */ + ldr x27, __switch_data // address to jump to after + // MMU has been enabled + adr lr, __enable_mmu // return (PIC) address + ldr x12, [x23, #CPU_INFO_SETUP] + add x12, x12, x28 // __virt_to_phys + br x12 // initialise processor +ENDPROC(stext) + +/* + * If we're fortunate enough to boot at EL2, ensure that the world is + * sane before dropping to EL1. + */ +ENTRY(el2_setup) + mrs x0, CurrentEL + cmp x0, #PSR_MODE_EL2t + ccmp x0, #PSR_MODE_EL2h, #0x4, ne + ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode + add x0, x0, x28 + b.eq 1f + str wzr, [x0] // Remember we don't have EL2... + ret + + /* Hyp configuration. */ +1: ldr w1, =BOOT_CPU_MODE_EL2 + str w1, [x0, #4] // This CPU has EL2 + mov x0, #(1 << 31) // 64-bit EL1 + msr hcr_el2, x0 + + /* Generic timers. */ + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 + msr cntvoff_el2, xzr // Clear virtual offset + + /* Populate ID registers. */ + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 + + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits + movk x0, #0x30d0, lsl #16 + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + +#ifdef CONFIG_COMPAT + msr hstr_el2, xzr // Disable CP15 traps to EL2 +#endif + + /* Stage-2 translation */ + msr vttbr_el2, xzr + + /* Hypervisor stub */ + adr x0, __hyp_stub_vectors + msr vbar_el2, x0 + + /* spsr */ + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, x0 + msr elr_el2, lr + eret +ENDPROC(el2_setup) + +/* + * We need to find out the CPU boot mode long after boot, so we need to + * store it in a writable variable. + * + * This is not in .bss, because we set it sufficiently early that the boot-time + * zeroing of .bss would clobber it. + */ + .pushsection .data +ENTRY(__boot_cpu_mode) + .long BOOT_CPU_MODE_EL2 + .long 0 + .popsection + + .align 3 +2: .quad . + .quad PAGE_OFFSET + +#ifdef CONFIG_SMP + .align 3 +1: .quad . + .quad secondary_holding_pen_release + + +/*FIXME: MTK only*/ +CCI400_SI3_SNOOP_CONTROL: +.long 0x10394000 +CCI400_STATUS: +.long 0x1039000C +MISCDBG: +.long 0x1020020C + /* + * This provides a "holding pen" for platforms to hold all secondary + * cores are held until we're ready for them to initialise. + */ +ENTRY(secondary_holding_pen) + mrs x0, mpidr_el1 + ubfx x0, x0, #8, #4 + cmp x0, #0 + b.eq cluster0 + ldr w2, MISCDBG + ldr w1, [x2] + bic w1, w1, #0x10 + str w1, [x2] + ldr w2, CCI400_SI3_SNOOP_CONTROL + ldr w1, [x2] + orr w1, w1, #0x3 + str w1, [x2] + ldr w2, CCI400_STATUS + b 3f +0: + dsb sy +3: + ldr w1, [x2] + tst w1, #1 + bne 0b + +cluster0: + bl __calc_phys_offset // x24=phys offset + bl el2_setup // Drop to EL1 + mrs x0, mpidr_el1 + ldr x1, =MPIDR_HWID_BITMASK + and x0, x0, x1 + adr x1, 1b + ldp x2, x3, [x1] + sub x1, x1, x2 + add x3, x3, x1 +pen: ldr x4, [x3] + cmp x4, x0 + b.eq secondary_startup + wfe + b pen +ENDPROC(secondary_holding_pen) + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) + +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + */ + mrs x22, midr_el1 // x22=cpuid + mov x0, x22 + bl lookup_processor_type + mov x23, x0 // x23=current cpu_table + cbz x23, __error_p // invalid processor (x23=0)? + + pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + ldr x12, [x23, #CPU_INFO_SETUP] + add x12, x12, x28 // __virt_to_phys + blr x12 // initialise processor + + ldr x21, =secondary_data + ldr x27, =__secondary_switched // address to jump to after enabling the MMU + b __enable_mmu +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr x0, [x21] // get secondary_data.stack + mov sp, x0 + mov x29, #0 + b secondary_start_kernel +ENDPROC(__secondary_switched) +#endif /* CONFIG_SMP */ + +/* + * Setup common bits before finally enabling the MMU. Essentially this is just + * loading the page table pointer and vector base registers. + * + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on + * the MMU. + */ +__enable_mmu: + ldr x5, =vectors + msr vbar_el1, x5 + msr ttbr0_el1, x25 // load TTBR0 + msr ttbr1_el1, x26 // load TTBR1 + isb + b __turn_mmu_on +ENDPROC(__enable_mmu) + +/* + * Enable the MMU. This completely changes the structure of the visible memory + * space. You will not be able to trace execution through this. + * + * x0 = system control register + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion + */ + .align 6 +__turn_mmu_on: + msr sctlr_el1, x0 + isb + br x27 +ENDPROC(__turn_mmu_on) + +/* + * Calculate the start of physical memory. + */ +ENTRY(__calc_phys_offset) + adr x0, 1f + ldp x1, x2, [x0] + sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET + add x24, x2, x28 // x24 = PHYS_OFFSET + ret +ENDPROC(__calc_phys_offset) + + .align 3 +1: .quad . + .quad PAGE_OFFSET + +/* + * Macro to populate the PGD for the corresponding block entry in the next + * level (tbl) for the given virtual address. + * + * Preserves: pgd, tbl, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + lsr \tmp1, \virt, #PGDIR_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index + orr \tmp2, \tbl, #3 // PGD entry table type + str \tmp2, [\pgd, \tmp1, lsl #3] + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + lsr \phys, \phys, #BLOCK_SHIFT + .if \idmap + and \start, \phys, #PTRS_PER_PTE - 1 // table index + .else + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + .endif + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + .ifnc \start,\end + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index + .endif +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + .ifnc \start,\end + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endif + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + add x0, x25, #PAGE_SIZE // section table address + adr x3, __turn_mmu_on // virtual/physical address + create_pgd_entry x25, x0, x3, x5, x6 + create_block_map x0, x7, x3, x5, x5, idmap=1 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + add x0, x26, #PAGE_SIZE // section table address + mov x5, #PAGE_OFFSET + create_pgd_entry x26, x0, x5, x3, x6 + ldr x6, =KERNEL_END - 1 + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: +#ifdef CONFIG_EARLY_PRINTK + /* + * Create the pgd entry for the UART mapping. The full mapping is done + * later based earlyprintk kernel parameter. + */ + ldr x5, =EARLYCON_IOBASE // UART virtual address + add x0, x26, #2 * PAGE_SIZE // section table address + create_pgd_entry x26, x0, x5, x6, x7 +#endif + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __data_loc // x4 + .quad _data // x5 + .quad __bss_start // x6 + .quad _end // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x4, x5, [x3], #16 + ldp x6, x7, [x3], #16 + cmp x4, x5 // Copy data segment if needed +1: ccmp x5, x6, #4, ne + b.eq 2f + ldr x16, [x4], #8 + str x16, [x5], #8 + b 1b +2: +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * Exception handling. Something went wrong and we can't proceed. We ought to + * tell the user, but since we don't have any guarantee that we're even + * running on the right architecture, we do virtually nothing. + */ +__error_p: +ENDPROC(__error_p) + +__error: +1: nop + b 1b +ENDPROC(__error) + +/* + * This function gets the processor ID in w0 and searches the cpu_table[] for + * a match. It returns a pointer to the struct cpu_info it found. The + * cpu_table[] must end with an empty (all zeros) structure. + * + * This routine can be called via C code and it needs to work with the MMU + * both disabled and enabled (the offset is calculated automatically). + */ +ENTRY(lookup_processor_type) + adr x1, __lookup_processor_type_data + ldp x2, x3, [x1] + sub x1, x1, x2 // get offset between VA and PA + add x3, x3, x1 // convert VA to PA +1: + ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask + cbz w5, 2f // end of list? + and w6, w6, w0 + cmp w5, w6 + b.eq 3f + add x3, x3, #CPU_INFO_SZ + b 1b +2: + mov x3, #0 // unknown processor +3: + mov x0, x3 + ret +ENDPROC(lookup_processor_type) + + .align 3 + .type __lookup_processor_type_data, %object +__lookup_processor_type_data: + .quad . + .quad cpu_table + .size __lookup_processor_type_data, . - __lookup_processor_type_data + +/* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c new file mode 100644 index 000000000..4a911591d --- /dev/null +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -0,0 +1,884 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define pr_fmt(fmt) "hw-breakpoint: " fmt + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/smp.h> + +#include <asm/current.h> +#include <asm/debug-monitors.h> +#include <asm/hw_breakpoint.h> +#include <asm/kdebug.h> +#include <asm/traps.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Breakpoint currently in use for each BRP. */ +static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); + +/* Watchpoint currently in use for each WRP. */ +static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); + +/* Currently stepping a per-CPU kernel breakpoint. */ +static DEFINE_PER_CPU(int, stepping_kernel_bp); + +/* Number of BRP/WRP registers on this CPU. */ +static int core_num_brps; +static int core_num_wrps; + +/* Determine number of BRP registers available. */ +static int get_num_brps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + +#define READ_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_READ(N, REG, VAL); \ + break + +#define WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_WRITE(N, REG, VAL); \ + break + +#define GEN_READ_WB_REG_CASES(OFF, REG, VAL) \ + READ_WB_REG_CASE(OFF, 0, REG, VAL); \ + READ_WB_REG_CASE(OFF, 1, REG, VAL); \ + READ_WB_REG_CASE(OFF, 2, REG, VAL); \ + READ_WB_REG_CASE(OFF, 3, REG, VAL); \ + READ_WB_REG_CASE(OFF, 4, REG, VAL); \ + READ_WB_REG_CASE(OFF, 5, REG, VAL); \ + READ_WB_REG_CASE(OFF, 6, REG, VAL); \ + READ_WB_REG_CASE(OFF, 7, REG, VAL); \ + READ_WB_REG_CASE(OFF, 8, REG, VAL); \ + READ_WB_REG_CASE(OFF, 9, REG, VAL); \ + READ_WB_REG_CASE(OFF, 10, REG, VAL); \ + READ_WB_REG_CASE(OFF, 11, REG, VAL); \ + READ_WB_REG_CASE(OFF, 12, REG, VAL); \ + READ_WB_REG_CASE(OFF, 13, REG, VAL); \ + READ_WB_REG_CASE(OFF, 14, REG, VAL); \ + READ_WB_REG_CASE(OFF, 15, REG, VAL) + +#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL) \ + WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +static u64 read_wb_reg(int reg, int n) +{ + u64 val = 0; + + switch (reg + n) { + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + pr_warning("attempt to read from unknown breakpoint register %d\n", n); + } + + return val; +} + +static void write_wb_reg(int reg, int n, u64 val) +{ + switch (reg + n) { + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + pr_warning("attempt to write to unknown breakpoint register %d\n", n); + } + isb(); +} + +/* + * Convert a breakpoint privilege level to the corresponding exception + * level. + */ +static enum debug_el debug_exception_level(int privilege) +{ + switch (privilege) { + case AARCH64_BREAKPOINT_EL0: + return DBG_ACTIVE_EL0; + case AARCH64_BREAKPOINT_EL1: + return DBG_ACTIVE_EL1; + default: + pr_warning("invalid breakpoint privilege level %d\n", privilege); + return -EINVAL; + } +} + +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + struct debug_info *debug_info = ¤t->thread.debug; + int i, max_slots, ctrl_reg, val_reg, reg_enable; + u32 ctrl; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + ctrl_reg = AARCH64_DBG_REG_BCR; + val_reg = AARCH64_DBG_REG_BVR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + reg_enable = !debug_info->bps_disabled; + } else { + /* Watchpoint */ + ctrl_reg = AARCH64_DBG_REG_WCR; + val_reg = AARCH64_DBG_REG_WVR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + reg_enable = !debug_info->wps_disabled; + } + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (!*slot) { + *slot = bp; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + return -ENOSPC; + + /* Ensure debug monitors are enabled at the correct exception level. */ + enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + + return 0; +} + +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event **slot, **slots; + int i, max_slots, base; + + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + /* Breakpoint */ + base = AARCH64_DBG_REG_BCR; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + } else { + /* Watchpoint */ + base = AARCH64_DBG_REG_WCR; + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + } + + /* Remove the breakpoint. */ + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + return; + + /* Reset the control register. */ + write_wb_reg(base, i, 0); + + /* Release the debug monitors for the correct exception level. */ + disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case ARM_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case ARM_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case ARM_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; + case ARM_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; + } + + return len_in_bytes; +} + +/* + * Check whether bp virtual address is in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ + unsigned int len; + unsigned long va; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + va = info->address; + len = get_hbp_len(info->ctrl.len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl. + * Hopefully this will disappear when ptrace can bypass the conversion + * to generic breakpoint descriptions. + */ +int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type) +{ + /* Type */ + switch (ctrl.type) { + case ARM_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case ARM_BREAKPOINT_LOAD: + *gen_type = HW_BREAKPOINT_R; + break; + case ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_W; + break; + case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_RW; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (ctrl.len) { + case ARM_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case ARM_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case ARM_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; + case ARM_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Construct an arch_hw_breakpoint from a perf_event. + */ +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_X: + info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + break; + case HW_BREAKPOINT_R: + info->ctrl.type = ARM_BREAKPOINT_LOAD; + break; + case HW_BREAKPOINT_W: + info->ctrl.type = ARM_BREAKPOINT_STORE; + break; + case HW_BREAKPOINT_RW: + info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (bp->attr.bp_len) { + case HW_BREAKPOINT_LEN_1: + info->ctrl.len = ARM_BREAKPOINT_LEN_1; + break; + case HW_BREAKPOINT_LEN_2: + info->ctrl.len = ARM_BREAKPOINT_LEN_2; + break; + case HW_BREAKPOINT_LEN_4: + info->ctrl.len = ARM_BREAKPOINT_LEN_4; + break; + case HW_BREAKPOINT_LEN_8: + info->ctrl.len = ARM_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + /* + * On AArch64, we only permit breakpoints of length 4, whereas + * AArch32 also requires breakpoints of length 2 for Thumb. + * Watchpoints can be of length 1, 2, 4 or 8 bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + if (is_compat_task()) { + if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + } else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { + /* + * FIXME: Some tools (I'm looking at you perf) assume + * that breakpoints should be sizeof(long). This + * is nonsense. For now, we fix up the parameter + * but we should probably return -EINVAL instead. + */ + info->ctrl.len = ARM_BREAKPOINT_LEN_4; + } + } + + /* Address */ + info->address = bp->attr.bp_addr; + + /* + * Privilege + * Note that we disallow combined EL0/EL1 breakpoints because + * that would complicate the stepping code. + */ + if (arch_check_bp_in_kernelspace(bp)) + info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; + else + info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + + /* Enabled? */ + info->ctrl.enabled = !bp->attr.disabled; + + return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings. + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + int ret; + u64 alignment_mask, offset; + + /* Build the arch_hw_breakpoint. */ + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + /* + * Check address alignment. + * We don't do any clever alignment correction for watchpoints + * because using 64-bit unaligned addresses is deprecated for + * AArch64. + * + * AArch32 tasks expect some simple alignment fixups, so emulate + * that here. + */ + if (is_compat_task()) { + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + else + alignment_mask = 0x3; + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + default: + return -EINVAL; + } + + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + } else { + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) + alignment_mask = 0x3; + else + alignment_mask = 0x7; + if (info->address & alignment_mask) + return -EINVAL; + } + + /* + * Disallow per-task kernel breakpoints since these would + * complicate the stepping code. + */ + if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target) + return -EINVAL; + + return 0; +} + +/* + * Enable/disable all of the breakpoints active at the specified + * exception level at the register level. + * This is used when single-stepping after a breakpoint exception. + */ +static void toggle_bp_registers(int reg, enum debug_el el, int enable) +{ + int i, max_slots, privilege; + u32 ctrl; + struct perf_event **slots; + + switch (reg) { + case AARCH64_DBG_REG_BCR: + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps; + break; + case AARCH64_DBG_REG_WCR: + slots = __get_cpu_var(wp_on_reg); + max_slots = core_num_wrps; + break; + default: + return; + } + + for (i = 0; i < max_slots; ++i) { + if (!slots[i]) + continue; + + privilege = counter_arch_bp(slots[i])->ctrl.privilege; + if (debug_exception_level(privilege) != el) + continue; + + ctrl = read_wb_reg(reg, i); + if (enable) + ctrl |= 0x1; + else + ctrl &= ~0x1; + write_wb_reg(reg, i, ctrl); + } +} + +/* + * Debug exception handlers. + */ +static int breakpoint_handler(unsigned long unused, unsigned int esr, + struct pt_regs *regs) +{ + int i, step = 0, *kernel_step; + u32 ctrl_reg; + u64 addr, val; + struct perf_event *bp, **slots; + struct debug_info *debug_info; + struct arch_hw_breakpoint_ctrl ctrl; + + slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + addr = instruction_pointer(regs); + debug_info = ¤t->thread.debug; + + for (i = 0; i < core_num_brps; ++i) { + rcu_read_lock(); + + bp = slots[i]; + + if (bp == NULL) + goto unlock; + + /* Check if the breakpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_BVR, i); + if (val != (addr & ~0x3)) + goto unlock; + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & 0x3)) & ctrl.len)) + goto unlock; + + counter_arch_bp(bp)->trigger = addr; + perf_bp_event(bp, regs); + + /* Do we need to handle the stepping? */ + if (!bp->overflow_handler) + step = 1; +unlock: + rcu_read_unlock(); + } + + if (!step) + return 0; + + if (user_mode(regs)) { + debug_info->bps_disabled = 1; + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0); + + /* If we're already stepping a watchpoint, just return. */ + if (debug_info->wps_disabled) + return 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + debug_info->suspended_step = 1; + else + user_enable_single_step(current); + } else { + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + if (*kernel_step != ARM_KERNEL_STEP_NONE) + return 0; + + if (kernel_active_single_step()) { + *kernel_step = ARM_KERNEL_STEP_SUSPEND; + } else { + *kernel_step = ARM_KERNEL_STEP_ACTIVE; + kernel_enable_single_step(regs); + } + } + + return 0; +} + +static int watchpoint_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + int i, step = 0, *kernel_step, access; + u32 ctrl_reg; + u64 val, alignment_mask; + struct perf_event *wp, **slots; + struct debug_info *debug_info; + struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint_ctrl ctrl; + + slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + debug_info = ¤t->thread.debug; + + for (i = 0; i < core_num_wrps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ + if (is_compat_task()) { + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + else + alignment_mask = 0x3; + } else { + alignment_mask = 0x7; + } + + /* Check if the watchpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + if (val != (addr & ~alignment_mask)) + goto unlock; + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & alignment_mask)) & ctrl.len)) + goto unlock; + + /* + * Check that the access type matches. + * 0 => load, otherwise => store + */ + access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : + HW_BREAKPOINT_R; + if (!(access & hw_breakpoint_type(wp))) + goto unlock; + + info->trigger = addr; + perf_bp_event(wp, regs); + + /* Do we need to handle the stepping? */ + if (!wp->overflow_handler) + step = 1; + +unlock: + rcu_read_unlock(); + } + + if (!step) + return 0; + + /* + * We always disable EL0 watchpoints because the kernel can + * cause these to fire via an unprivileged access. + */ + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0); + + if (user_mode(regs)) { + debug_info->wps_disabled = 1; + + /* If we're already stepping a breakpoint, just return. */ + if (debug_info->bps_disabled) + return 0; + + if (test_thread_flag(TIF_SINGLESTEP)) + debug_info->suspended_step = 1; + else + user_enable_single_step(current); + } else { + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + if (*kernel_step != ARM_KERNEL_STEP_NONE) + return 0; + + if (kernel_active_single_step()) { + *kernel_step = ARM_KERNEL_STEP_SUSPEND; + } else { + *kernel_step = ARM_KERNEL_STEP_ACTIVE; + kernel_enable_single_step(regs); + } + } + + return 0; +} + +/* + * Handle single-step exception. + */ +int reinstall_suspended_bps(struct pt_regs *regs) +{ + struct debug_info *debug_info = ¤t->thread.debug; + int handled_exception = 0, *kernel_step; + + kernel_step = &__get_cpu_var(stepping_kernel_bp); + + /* + * Called from single-step exception handler. + * Return 0 if execution can resume, 1 if a SIGTRAP should be + * reported. + */ + if (user_mode(regs)) { + if (debug_info->bps_disabled) { + debug_info->bps_disabled = 0; + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1); + handled_exception = 1; + } + + if (debug_info->wps_disabled) { + debug_info->wps_disabled = 0; + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); + handled_exception = 1; + } + + if (handled_exception) { + if (debug_info->suspended_step) { + debug_info->suspended_step = 0; + /* Allow exception handling to fall-through. */ + handled_exception = 0; + } else { + user_disable_single_step(current); + } + } + } else if (*kernel_step != ARM_KERNEL_STEP_NONE) { + toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1); + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1); + + if (!debug_info->wps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); + + if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) { + kernel_disable_single_step(); + handled_exception = 1; + } else { + handled_exception = 0; + } + + *kernel_step = ARM_KERNEL_STEP_NONE; + } + + return !handled_exception; +} + +/* + * Context-switcher for restoring suspended breakpoints. + */ +void hw_breakpoint_thread_switch(struct task_struct *next) +{ + /* + * current next + * disabled: 0 0 => The usual case, NOTIFY_DONE + * 0 1 => Disable the registers + * 1 0 => Enable the registers + * 1 1 => NOTIFY_DONE. per-task bps will + * get taken care of by perf. + */ + + struct debug_info *current_debug_info, *next_debug_info; + + current_debug_info = ¤t->thread.debug; + next_debug_info = &next->thread.debug; + + /* Update breakpoints. */ + if (current_debug_info->bps_disabled != next_debug_info->bps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_BCR, + DBG_ACTIVE_EL0, + !next_debug_info->bps_disabled); + + /* Update watchpoints. */ + if (current_debug_info->wps_disabled != next_debug_info->wps_disabled) + toggle_bp_registers(AARCH64_DBG_REG_WCR, + DBG_ACTIVE_EL0, + !next_debug_info->wps_disabled); +} + +/* + * CPU initialisation. + */ +static void reset_ctrl_regs(void *unused) +{ +#ifdef CONFIG_MEDIATEK_SOLUTION + /* mediatek will use our own operations for hw breakpoint/watchpoint */ +#else + int i; + + for (i = 0; i < core_num_brps; ++i) { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } + + for (i = 0; i < core_num_wrps; ++i) { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } +#endif +} + +static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, + unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + if (action == CPU_ONLINE) + smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { + .notifier_call = hw_breakpoint_reset_notify, +}; + +/* + * One-time initialisation. + */ +static int __init arch_hw_breakpoint_init(void) +{ + core_num_brps = get_num_brps(); + core_num_wrps = get_num_wrps(); + + pr_info("found %d breakpoint and %d watchpoint registers.\n", + core_num_brps, core_num_wrps); + + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + smp_call_function(reset_ctrl_regs, NULL, 1); + reset_ctrl_regs(NULL); + + /* Register debug fault handlers. */ + hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, + TRAP_HWBKPT, "hw-breakpoint handler"); + hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, + TRAP_HWBKPT, "hw-watchpoint handler"); + + /* Register hotplug notifier. */ + register_cpu_notifier(&hw_breakpoint_reset_nb); + + return 0; +} +arch_initcall(arch_hw_breakpoint_init); + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +} + +/* + * Dummy function to register with die_notifier. + */ +int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S new file mode 100644 index 000000000..0959611d9 --- /dev/null +++ b/arch/arm64/kernel/hyp-stub.S @@ -0,0 +1,109 @@ +/* + * Hypervisor stub + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/ptrace.h> +#include <asm/virt.h> + + .text + .align 11 + +ENTRY(__hyp_stub_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__hyp_stub_vectors) + + .align 11 + +el1_sync: + mrs x1, esr_el2 + lsr x1, x1, #26 + cmp x1, #0x16 + b.ne 2f // Not an HVC trap + cbz x0, 1f + msr vbar_el2, x0 // Set vbar_el2 + b 2f +1: mrs x0, vbar_el2 // Return vbar_el2 +2: eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* + * __hyp_set_vectors: Call this after boot to set the initial hypervisor + * vectors as part of hypervisor installation. On an SMP system, this should + * be called on each CPU. + * + * x0 must be the physical address of the new vector table, and must be + * 2KB aligned. + * + * Before calling this, you must check that the stub hypervisor is installed + * everywhere, by waiting for any secondary CPUs to be brought up and then + * checking that is_hyp_mode_available() is true. + * + * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or + * something else went wrong... in such cases, trying to install a new + * hypervisor is unlikely to work as desired. + * + * When you call into your shiny new hypervisor, sp_el2 will contain junk, + * so you will need to set that to something sensible at the new hypervisor's + * initialisation entry point. + */ + +ENTRY(__hyp_get_vectors) + mov x0, xzr + // fall through +ENTRY(__hyp_set_vectors) + hvc #0 + ret +ENDPROC(__hyp_get_vectors) +ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 000000000..92f368354 --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ + return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_YIELD: + case AARCH64_INSN_HINT_WFE: + case AARCH64_INSN_HINT_WFI: + case AARCH64_INSN_HINT_SEV: + case AARCH64_INSN_HINT_SEVL: + return false; + default: + return true; + } +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ + insn = cpu_to_le32(insn); + return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) + return false; + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_svc(insn) || + aarch64_insn_is_hvc(insn) || + aarch64_insn_is_smc(insn) || + aarch64_insn_is_brk(insn) || + aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ + return __aarch64_insn_hotpatch_safe(old_insn) && + __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + /* A64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + + ret = aarch64_insn_write(tp, insn); + if (ret == 0) + flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); + + return ret; +} + +struct aarch64_insn_patch { + void **text_addrs; + u32 *new_insns; + int insn_cnt; + atomic_t cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ + int i, ret = 0; + struct aarch64_insn_patch *pp = arg; + + /* The first CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == 1) { + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], + pp->new_insns[i]); + /* + * aarch64_insn_patch_text_nosync() calls flush_icache_range(), + * which ends with "dsb; isb" pair guaranteeing global + * visibility. + */ + atomic_set(&pp->cpu_count, -1); + } else { + while (atomic_read(&pp->cpu_count) != -1) + cpu_relax(); + isb(); + } + + return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ + struct aarch64_insn_patch patch = { + .text_addrs = addrs, + .new_insns = insns, + .insn_cnt = cnt, + .cpu_count = ATOMIC_INIT(0), + }; + + if (cnt <= 0) + return -EINVAL; + + return stop_machine(aarch64_insn_patch_text_cb, &patch, + cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ + int ret; + u32 insn; + + /* Unsafe to patch multiple instructions without synchronizaiton */ + if (cnt == 1) { + ret = aarch64_insn_read(addrs[0], &insn); + if (ret) + return ret; + + if (aarch64_insn_hotpatch_safe(insn, insns[0])) { + /* + * ARMv8 architecture doesn't guarantee all CPUs see + * the new instruction after returning from function + * aarch64_insn_patch_text_nosync(). So send IPIs to + * all other CPUs to achieve instruction + * synchronization. + */ + ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); + kick_all_cpus_sync(); + return ret; + } + } + + return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type) +{ + u32 insn; + long offset; + + /* + * PC: A 64-bit Program Counter holding the address of the current + * instruction. A64 instructions must be word-aligned. + */ + BUG_ON((pc & 0x3) || (addr & 0x3)); + + /* + * B/BL support [-128M, 128M) offset + * ARM64 virtual address arrangement guarantees all kernel and module + * texts are within +/-128M. + */ + offset = ((long)addr - (long)pc); + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + + if (type == AARCH64_INSN_BRANCH_LINK) + insn = aarch64_insn_get_bl_value(); + else + insn = aarch64_insn_get_b_value(); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c new file mode 100644 index 000000000..7d37ead4d --- /dev/null +++ b/arch/arm64/kernel/io.c @@ -0,0 +1,64 @@ +/* + * Based on arch/arm/kernel/io.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/types.h> +#include <linux/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ + unsigned char *t = to; + while (count) { + count--; + *t = readb(from); + t++; + from++; + } +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ + const unsigned char *f = from; + while (count) { + count--; + writeb(*f, to); + f++; + to++; + } +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ + while (count) { + count--; + writeb(c, dst); + dst++; + } +} +EXPORT_SYMBOL(__memset_io); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c new file mode 100644 index 000000000..714655384 --- /dev/null +++ b/arch/arm64/kernel/irq.c @@ -0,0 +1,161 @@ +/* + * Based on arch/arm/kernel/irq.c + * + * Copyright (C) 1992 Linus Torvalds + * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. + * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation. + * Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and + * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel_stat.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/irqchip.h> +#include <linux/seq_file.h> +#include <linux/ratelimit.h> + +#include <linux/mt_sched_mon.h> +unsigned long irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#ifdef CONFIG_SMP + show_ipi_list(p, prec); +#endif + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); + return 0; +} + +#ifdef CONFIG_MTK_SCHED_TRACERS +#include <trace/events/mtk_events.h> +#endif + +/* + * handle_IRQ handles all hardware IRQ's. Decoded IRQs should + * not come via this function. Instead, they should provide their + * own 'handler'. Used by platform code implementing C-based 1st + * level decoding. + */ +void handle_IRQ(unsigned int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); +#ifdef CONFIG_MTK_SCHED_TRACERS + struct irq_desc *desc; +#endif + + irq_enter(); + mt_trace_ISR_start(irq); +#ifdef CONFIG_MTK_SCHED_TRACERS + desc = irq_to_desc(irq); + trace_irq_entry(irq, + (desc && desc->action && desc->action->name) ? desc->action->name : "-"); +#endif + + /* + * Some hardware gives randomly wrong interrupts. Rather + * than crashing, do something sensible. + */ + if (unlikely(irq >= nr_irqs)) { + pr_warn_ratelimited("Bad IRQ%u\n", irq); + ack_bad_irq(irq); + } else { + generic_handle_irq(irq); + } +#ifdef CONFIG_MTK_SCHED_TRACERS + trace_irq_exit(irq); +#endif + mt_trace_ISR_end(irq); + irq_exit(); + set_irq_regs(old_regs); +} + +void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return; + + handle_arch_irq = handle_irq; +} + +void __init init_IRQ(void) +{ + irqchip_init(); + if (!handle_arch_irq) + panic("No interrupt controller found."); +} + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 000000000..263a16629 --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_ENABLE) { + insn = aarch64_insn_gen_branch_imm(entry->code, + entry->target, + AARCH64_INSN_BRANCH_NOLINK); + } else { + insn = aarch64_insn_gen_nop(); + } + + if (is_static) + aarch64_insn_patch_text_nosync(addr, insn); + else + aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 000000000..a0d10c55f --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; + + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; + + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DBG_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S new file mode 100644 index 000000000..2f5b3ff7e --- /dev/null +++ b/arch/arm64/kernel/kuser32.S @@ -0,0 +1,80 @@ +/* + * Low-level user helpers placed in the vectors page for AArch32. + * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. + * + * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net> + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * AArch32 user helpers. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page. New segments (if ever needed) must be added in front of + * existing ones. This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * See Documentation/arm/kernel_user_helpers.txt for formal definitions. + */ + +#include <asm/unistd.h> + + .align 5 + .globl __kuser_helper_start +__kuser_helper_start: + +__kuser_cmpxchg64: // 0xffff0f60 + .inst 0xe92d00f0 // push {r4, r5, r6, r7} + .inst 0xe1c040d0 // ldrd r4, r5, [r0] + .inst 0xe1c160d0 // ldrd r6, r7, [r1] + .inst 0xf57ff05f // dmb sy + .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] + .inst 0xe0303004 // eors r3, r0, r4 + .inst 0x00313005 // eoreqs r3, r1, r5 + .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x03330001 // teqeq r3, #1 + .inst 0x0afffff9 // beq 1b + .inst 0xf57ff05f // dmb sy + .inst 0xe2730000 // rsbs r0, r3, #0 + .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} + .inst 0xe12fff1e // bx lr + + .align 5 +__kuser_memory_barrier: // 0xffff0fa0 + .inst 0xf57ff05f // dmb sy + .inst 0xe12fff1e // bx lr + + .align 5 +__kuser_cmpxchg: // 0xffff0fc0 + .inst 0xf57ff05f // dmb sy + .inst 0xe1923f9f // 1: ldrex r3, [r2] + .inst 0xe0533000 // subs r3, r3, r0 + .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x03330001 // teqeq r3, #1 + .inst 0x0afffffa // beq 1b + .inst 0xe2730000 // rsbs r0, r3, #0 + .inst 0xeaffffef // b <__kuser_memory_barrier> + + .align 5 +__kuser_get_tls: // 0xffff0fe0 + .inst 0xee1d0f70 // mrc p15, 0, r0, c13, c0, 3 + .inst 0xe12fff1e // bx lr + .rep 5 + .word 0 + .endr + +__kuser_helper_version: // 0xffff0ffc + .word ((__kuser_helper_end - __kuser_helper_start) >> 5) + .globl __kuser_helper_end +__kuser_helper_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c new file mode 100644 index 000000000..df08a6e02 --- /dev/null +++ b/arch/arm64/kernel/module.c @@ -0,0 +1,396 @@ +/* + * AArch64 loadable module support. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/bitops.h> +#include <linux/elf.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/moduleloader.h> +#include <linux/vmalloc.h> +#include <asm/insn.h> + +#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX +#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 + +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, -1, + __builtin_return_address(0)); +} + +enum aarch64_reloc_op { + RELOC_OP_NONE, + RELOC_OP_ABS, + RELOC_OP_PREL, + RELOC_OP_PAGE, +}; + +static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) +{ + switch (reloc_op) { + case RELOC_OP_ABS: + return val; + case RELOC_OP_PREL: + return val - (u64)place; + case RELOC_OP_PAGE: + return (val & ~0xfff) - ((u64)place & ~0xfff); + case RELOC_OP_NONE: + return 0; + } + + pr_err("do_reloc: unknown relocation operation %d\n", reloc_op); + return 0; +} + +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) +{ + u64 imm_mask = (1 << len) - 1; + s64 sval = do_reloc(op, place, val); + + switch (len) { + case 16: + *(s16 *)place = sval; + break; + case 32: + *(s32 *)place = sval; + break; + case 64: + *(s64 *)place = sval; + break; + default: + pr_err("Invalid length (%d) for data relocation\n", len); + return 0; + } + + /* + * Extract the upper value bits (including the sign bit) and + * shift them to bit 0. + */ + sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + + /* + * Overflow has occurred if the value is not representable in + * len bits (i.e the bottom len bits are not sign-extended and + * the top bits are not all zero). + */ + if ((u64)(sval + 1) > 2) + return -ERANGE; + + return 0; +} + +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_insn_imm_type imm_type) +{ + u64 imm, limit = 0; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; + + if (imm_type == AARCH64_INSN_IMM_MOVNZ) { + /* + * For signed MOVW relocations, we have to manipulate the + * instruction encoding depending on whether or not the + * immediate is less than zero. + */ + insn &= ~(3 << 29); + if ((s64)imm >= 0) { + /* >=0: Set the instruction to MOVZ (opcode 10b). */ + insn |= 2 << 29; + } else { + /* + * <0: Set the instruction to MOVN (opcode 00b). + * Since we've masked the opcode already, we + * don't need to do anything other than + * inverting the new immediate field. + */ + imm = ~imm; + } + imm_type = AARCH64_INSN_IMM_MOVK; + } + + /* Update the instruction with the new encoding. */ + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); + + /* Shift out the immediate field. */ + sval >>= 16; + + /* + * For unsigned immediates, the overflow check is straightforward. + * For signed immediates, the sign bit is actually the bit past the + * most significant bit of the field. + * The AARCH64_INSN_IMM_16 immediate type is unsigned. + */ + if (imm_type != AARCH64_INSN_IMM_16) { + sval++; + limit++; + } + + /* Check the upper bits depending on the sign of the immediate. */ + if ((u64)sval > limit) + return -ERANGE; + + return 0; +} + +static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, int len, enum aarch64_insn_imm_type imm_type) +{ + u64 imm, imm_mask; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + /* Calculate the relocation value. */ + sval = do_reloc(op, place, val); + sval >>= lsb; + + /* Extract the value bits and shift them to bit 0. */ + imm_mask = (BIT(lsb + len) - 1) >> lsb; + imm = sval & imm_mask; + + /* Update the instruction's immediate field. */ + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); + + /* + * Extract the upper value bits (including the sign bit) and + * shift them to bit 0. + */ + sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + + /* + * Overflow has occurred if the upper bits are not all equal to + * the sign bit of the value. + */ + if ((u64)(sval + 1) >= 2) + return -ERANGE; + + return 0; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + int ovf; + bool overflow_check; + Elf64_Sym *sym; + void *loc; + u64 val; + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* loc corresponds to P in the AArch64 ELF document. */ + loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + + /* sym is the ELF symbol we're referring to. */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rel[i].r_info); + + /* val corresponds to (S + A) in the AArch64 ELF document. */ + val = sym->st_value + rel[i].r_addend; + + /* Check for overflow by default. */ + overflow_check = true; + + /* Perform the static relocation. */ + switch (ELF64_R_TYPE(rel[i].r_info)) { + /* Null relocations. */ + case R_ARM_NONE: + case R_AARCH64_NONE: + ovf = 0; + break; + + /* Data relocations. */ + case R_AARCH64_ABS64: + overflow_check = false; + ovf = reloc_data(RELOC_OP_ABS, loc, val, 64); + break; + case R_AARCH64_ABS32: + ovf = reloc_data(RELOC_OP_ABS, loc, val, 32); + break; + case R_AARCH64_ABS16: + ovf = reloc_data(RELOC_OP_ABS, loc, val, 16); + break; + case R_AARCH64_PREL64: + overflow_check = false; + ovf = reloc_data(RELOC_OP_PREL, loc, val, 64); + break; + case R_AARCH64_PREL32: + ovf = reloc_data(RELOC_OP_PREL, loc, val, 32); + break; + case R_AARCH64_PREL16: + ovf = reloc_data(RELOC_OP_PREL, loc, val, 16); + break; + + /* MOVW instruction relocations. */ + case R_AARCH64_MOVW_UABS_G0_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G0: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, + AARCH64_INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G1_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G1: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, + AARCH64_INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G2_NC: + overflow_check = false; + case R_AARCH64_MOVW_UABS_G2: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, + AARCH64_INSN_IMM_16); + break; + case R_AARCH64_MOVW_UABS_G3: + /* We're using the top bits so we can't overflow. */ + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, + AARCH64_INSN_IMM_16); + break; + case R_AARCH64_MOVW_SABS_G0: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_SABS_G1: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_SABS_G2: + ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G0_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, + AARCH64_INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G0: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G1_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, + AARCH64_INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G1: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G2_NC: + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, + AARCH64_INSN_IMM_MOVK); + break; + case R_AARCH64_MOVW_PREL_G2: + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, + AARCH64_INSN_IMM_MOVNZ); + break; + case R_AARCH64_MOVW_PREL_G3: + /* We're using the top bits so we can't overflow. */ + overflow_check = false; + ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, + AARCH64_INSN_IMM_MOVNZ); + break; + + /* Immediate instruction relocations. */ + case R_AARCH64_LD_PREL_LO19: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, + AARCH64_INSN_IMM_19); + break; + case R_AARCH64_ADR_PREL_LO21: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, + AARCH64_INSN_IMM_ADR); + break; + case R_AARCH64_ADR_PREL_PG_HI21_NC: + overflow_check = false; + case R_AARCH64_ADR_PREL_PG_HI21: + ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, + AARCH64_INSN_IMM_ADR); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, + AARCH64_INSN_IMM_12); + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, + AARCH64_INSN_IMM_12); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, + AARCH64_INSN_IMM_12); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, + AARCH64_INSN_IMM_12); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + overflow_check = false; + ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, + AARCH64_INSN_IMM_12); + break; + case R_AARCH64_TSTBR14: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, + AARCH64_INSN_IMM_14); + break; + case R_AARCH64_CONDBR19: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, + AARCH64_INSN_IMM_19); + break; + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, + AARCH64_INSN_IMM_26); + break; + + default: + pr_err("module %s: unsupported RELA relocation: %llu\n", + me->name, ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + + if (overflow_check && ovf == -ERANGE) + goto overflow; + + } + + return 0; + +overflow: + pr_err("module %s: overflow in relocation type %d val %Lx\n", + me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); + return -ENOEXEC; +} diff --git a/arch/arm64/kernel/opcodes.c b/arch/arm64/kernel/opcodes.c new file mode 100644 index 000000000..ceb5a04a1 --- /dev/null +++ b/arch/arm64/kernel/opcodes.c @@ -0,0 +1,72 @@ +/* + * Copied from linux/arch/arm/kernel/opcodes.c + * + * A32 condition code lookup feature moved from nwfpe/fpopcode.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <asm/opcodes.h> + +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +/* + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Returns: + * ARM_OPCODE_CONDTEST_FAIL - if condition fails + * ARM_OPCODE_CONDTEST_PASS - if condition passes (including AL) + * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional + * opcode space from v5 onwards + * + * Code that tests whether a conditional instruction would pass its condition + * check should check that return value == ARM_OPCODE_CONDTEST_PASS. + * + * Code that tests if a condition means that the instruction would be executed + * (regardless of conditional or unconditional) should instead check that the + * return value != ARM_OPCODE_CONDTEST_FAIL. + */ +asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr) +{ + u32 cc_bits = opcode >> 28; + u32 psr_cond = (u32)(psr & 0xffffffff) >> 28; + unsigned int ret; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((cc_map[cc_bits] >> (psr_cond)) & 1) + ret = ARM_OPCODE_CONDTEST_PASS; + else + ret = ARM_OPCODE_CONDTEST_FAIL; + } else { + ret = ARM_OPCODE_CONDTEST_UNCOND; + } + + return ret; +} +EXPORT_SYMBOL_GPL(arm_check_condition); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c new file mode 100644 index 000000000..cea1594ff --- /dev/null +++ b/arch/arm64/kernel/perf_event.c @@ -0,0 +1,1411 @@ +/* + * PMU support + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This code is based heavily on the ARMv7 perf event code. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include <linux/bitmap.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> + +#include <asm/cputype.h> +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> +#include <asm/stacktrace.h> + +/* + * ARMv8 supports a maximum of 32 events. + * The cycle counter is included in this total. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); +static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *cpu_pmu; + +int +armpmu_get_max_events(void) +{ + int max_events = 0; + + if (cpu_pmu != NULL) + max_events = cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(armpmu_get_max_events); + +int perf_num_counters(void) +{ + return armpmu_get_max_events(); +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +#define HW_OP_UNSUPPORTED 0xFFFF + +#define C(_x) \ + PERF_COUNT_HW_CACHE_##_x + +#define CACHE_OP_UNSUPPORTED 0xFFFF + +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + return (int)(config & raw_event_mask); +} + +static int map_cpu_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int +armpmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)armpmu->max_period) + left = armpmu->max_period; + + local64_set(&hwc->prev_count, (u64)-left); + + armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +u64 +armpmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = armpmu->read_counter(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + armpmu_event_update(event, hwc, hwc->idx); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void +armpmu_start(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* + * Set the period again. Some counters can't be stopped, so when we + * were stopped we simply disabled the IRQ source and the counter + * may have been left counting. If we don't do this step then we may + * get an interrupt too soon or *way* too late if the overflow has + * happened since disabling. + */ + armpmu_event_set_period(event, hwc, hwc->idx); + armpmu->enable(hwc, hwc->idx); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + armpmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + + perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = armpmu->get_event_idx(hw_events, hwc); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + armpmu->disable(hwc, idx); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static int +validate_event(struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event fake_event = event->hw; + struct pmu *leader_pmu = event->group_leader->pmu; + + if (is_software_event(event)) + return 1; + + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + return armpmu->get_event_idx(hw_events, &fake_event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); + + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(fake_used_mask, 0, sizeof(fake_used_mask)); + fake_pmu.used_mask = fake_used_mask; + + if (!validate_event(&fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(&fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = armpmu->plat_device; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, armpmu); + } +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = armpmu->plat_device; + + if (!pmu_device) { + pr_err("no PMU device registered\n"); + return -ENODEV; + } + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_err("no irqs for PMUs defined\n"); + return -ENODEV; + } + + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } + + return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); + } +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping, err; + + mapping = armpmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support mode exclusion\n"); + return -EPERM; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!hwc->sample_period) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + err = 0; + if (event->group_leader != event) { + err = validate_group(event); + if (err) + return -EINVAL; + } + + return err; +} + +static int armpmu_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &armpmu->active_events; + + if (armpmu->map_event(event) == -ENOENT) + return -ENOENT; + + event->destroy = hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); + + if (!err) + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + if (enabled) + armpmu->start(); +} + +static void armpmu_disable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + armpmu->stop(); +} + +static void __init armpmu_init(struct arm_pmu *armpmu) +{ + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); + + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + }; +} + +int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) +{ + armpmu_init(armpmu); + return perf_pmu_register(&armpmu->pmu, name, type); +} + +/* + * ARMv8 PMUv3 Performance Events handling code. + * Common event types. + */ +enum armv8_pmuv3_perf_types { + /* Required events. */ + ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, + + /* At least one of the following is required. */ + ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, + ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, + + /* Common architectural events. */ + ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, + ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, + ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, + ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, + ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, + ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, + ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, + ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, + + /* Common microarchitectural events. */ + ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, + ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, + ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, + ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, + ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, + ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, + ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, + ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, + ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, + ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, + ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, +}; + +/* PMUv3 HW events mapping. */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events' indices + */ +#define ARMV8_IDX_CYCLE_COUNTER 0 +#define ARMV8_IDX_COUNTER0 1 +#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +#define ARMV8_MAX_COUNTERS 32 +#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) + +/* + * ARMv8 low level PMU access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV8_IDX_TO_COUNTER(x) \ + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMCR_N_MASK 0x1f +#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv3 + */ +#define ARMV8_EXCLUDE_EL1 (1 << 31) +#define ARMV8_EXCLUDE_EL0 (1 << 30) +#define ARMV8_INCLUDE_EL2 (1 << 27) + +static inline u32 armv8pmu_pmcr_read(void) +{ + u32 val; + asm volatile("mrs %0, pmcr_el0" : "=r" (val)); + return val; +} + +static inline void armv8pmu_pmcr_write(u32 val) +{ + val &= ARMV8_PMCR_MASK; + isb(); + asm volatile("msr pmcr_el0, %0" :: "r" (val)); +} + +static inline int armv8pmu_has_overflowed(u32 pmovsr) +{ + return pmovsr & ARMV8_OVERFLOWED_MASK; +} + +static inline int armv8pmu_counter_valid(int idx) +{ + return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; +} + +static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +{ + int ret = 0; + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), idx); + } else { + counter = ARMV8_IDX_TO_COUNTER(idx); + ret = pmnc & BIT(counter); + } + + return ret; +} + +static inline int armv8pmu_select_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u selecting wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmselr_el0, %0" :: "r" (counter)); + isb(); + + return idx; +} + +static inline u32 armv8pmu_read_counter(int idx) +{ + u32 value = 0; + + if (!armv8pmu_counter_valid(idx)) + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + + return value; +} + +static inline void armv8pmu_write_counter(int idx, u32 value) +{ + if (!armv8pmu_counter_valid(idx)) + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("msr pmccntr_el0, %0" :: "r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); +} + +static inline void armv8pmu_write_evtype(int idx, u32 val) +{ + if (armv8pmu_select_counter(idx) == idx) { + val &= ARMV8_EVTYPE_MASK; + asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); + } +} + +static inline int armv8pmu_enable_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u enabling wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_disable_counter(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u disabling wrong PMNC counter %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_enable_intens(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); + return idx; +} + +static inline int armv8pmu_disable_intens(int idx) +{ + u32 counter; + + if (!armv8pmu_counter_valid(idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); + isb(); + return idx; +} + +static inline u32 armv8pmu_getreset_flags(void) +{ + u32 value; + + /* Read */ + asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + + /* Write to clear flags */ + value &= ARMV8_OVSR_MASK; + asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + + return value; +} + +static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv8pmu_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters). + */ + armv8pmu_write_evtype(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv8pmu_enable_intens(idx); + + /* + * Enable counter + */ + armv8pmu_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Disable counter and interrupt + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv8pmu_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv8pmu_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +{ + u32 pmovsr; + struct perf_sample_data data; + struct pmu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmovsr = armv8pmu_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv8pmu_has_overflowed(pmovsr)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv8pmu_start(void) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(void) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; + + /* Always place a cycle counter into the cycle counter. */ + if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { + if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV8_IDX_CYCLE_COUNTER; + } + + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv8pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) + return -EPERM; + if (attr->exclude_user) + config_base |= ARMV8_EXCLUDE_EL0; + if (attr->exclude_kernel) + config_base |= ARMV8_EXCLUDE_EL1; + if (!attr->exclude_hv) + config_base |= ARMV8_INCLUDE_EL2; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv8pmu_reset(void *info) +{ + u32 idx, nb_cnt = cpu_pmu->num_events; + + /* The counter and interrupt enable registers are unknown at reset. */ + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) + armv8pmu_disable_event(NULL, idx); + + /* Initialize & Reset PMNC: C and P bits. */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + + /* Disable access from userspace. */ + asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return map_cpu_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, 0xFF); +} + +static struct arm_pmu armv8pmu = { + .handle_irq = armv8pmu_handle_irq, + .enable = armv8pmu_enable_event, + .disable = armv8pmu_disable_event, + .read_counter = armv8pmu_read_counter, + .write_counter = armv8pmu_write_counter, + .get_event_idx = armv8pmu_get_event_idx, + .start = armv8pmu_start, + .stop = armv8pmu_stop, + .reset = armv8pmu_reset, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv8pmu_read_num_pmnc_events(void) +{ + u32 nb_cnt; + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) +{ + armv8pmu.name = "arm/armv8-pmuv3"; + armv8pmu.map_event = armv8_pmuv3_map_event; + armv8pmu.num_events = armv8pmu_read_num_pmnc_events(); + armv8pmu.set_event_filter = armv8pmu_set_event_filter; + return &armv8pmu; +} + +/* + * Ensure the PMU has sane values out of reset. + * This requires SMP to be available, so exists as a separate initcall. + */ +static int __init +cpu_pmu_reset(void) +{ + if (cpu_pmu && cpu_pmu->reset) + return on_each_cpu(cpu_pmu->reset, NULL, 1); + return 0; +} +arch_initcall(cpu_pmu_reset); + +/* + * PMU platform driver and devicetree bindings. + */ +static struct of_device_id armpmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3"}, + {}, +}; + +static int armpmu_device_probe(struct platform_device *pdev) +{ + if (!cpu_pmu) + return -ENODEV; + + cpu_pmu->plat_device = pdev; + return 0; +} + +static struct platform_driver armpmu_driver = { + .driver = { + .name = "arm-pmu", + .of_match_table = armpmu_of_device_ids, + }, + .probe = armpmu_device_probe, +}; + +static int __init register_pmu_driver(void) +{ + return platform_driver_register(&armpmu_driver); +} +device_initcall(register_pmu_driver); + +static struct pmu_hw_events *armpmu_get_cpu_events(void) +{ + return &__get_cpu_var(cpu_hw_events); +} + +static void __init cpu_pmu_init(struct arm_pmu *armpmu) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); + events->events = per_cpu(hw_events, cpu); + events->used_mask = per_cpu(used_mask, cpu); + raw_spin_lock_init(&events->pmu_lock); + } + armpmu->get_hw_events = armpmu_get_cpu_events; +} + +static int __init init_hw_perf_events(void) +{ + u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + + switch ((dfr >> 8) & 0xf) { + case 0x1: /* PMUv3 */ + cpu_pmu = armv8_pmuv3_pmu_init(); + break; + } + + if (cpu_pmu) { + pr_info("enabled with %s PMU driver, %d counters available\n", + cpu_pmu->name, cpu_pmu->num_events); + cpu_pmu_init(cpu_pmu); + armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + } else { + pr_info("no hardware support available\n"); + } + + return 0; +} +early_initcall(init_hw_perf_events); + +/* + * Callchain handling code. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct frame_tail __user *tail; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->pc); + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *frame, void *data) +{ + struct perf_callchain_entry *entry = data; + perf_callchain_store(entry, frame->pc); + return 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct stackframe frame; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c new file mode 100644 index 000000000..ad14dbc07 --- /dev/null +++ b/arch/arm64/kernel/process.c @@ -0,0 +1,532 @@ +/* + * Based on arch/arm/kernel/process.c + * + * Original Copyright (C) 1995 Linus Torvalds + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdarg.h> + +#include <linux/compat.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/user.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/kallsyms.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/elfcore.h> +#include <linux/pm.h> +#include <linux/tick.h> +#include <linux/utsname.h> +#include <linux/uaccess.h> +#include <linux/random.h> +#include <linux/hw_breakpoint.h> +#include <linux/cpuidle.h> +#include <linux/leds.h> +#include <linux/personality.h> +#include <linux/notifier.h> +#include <linux/mtk_ram_console.h> + +#include <asm/compat.h> +#include <asm/cacheflush.h> +#include <asm/fpsimd.h> +#include <asm/mmu_context.h> +#include <asm/processor.h> +#include <asm/stacktrace.h> + +extern void arch_reset(char mode, const char *cmd); + +static void setup_restart(void) +{ + /* + * Tell the mm system that we are going to reboot - + * we may need it to insert some 1:1 mappings so that + * soft boot works. + */ + setup_mm_for_reboot(); + + /* Clean and invalidate caches */ + flush_cache_all(); + + /* Turn D-cache off */ + cpu_cache_off(); + + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); +} + +void soft_restart(unsigned long addr) +{ + setup_restart(); + cpu_reset(addr); +} + +/* + * Function pointers to optional machine specific functions + */ +void (*pm_power_off)(void); +EXPORT_SYMBOL_GPL(pm_power_off); + +void (*arm_pm_restart)(char str, const char *cmd); +EXPORT_SYMBOL_GPL(arm_pm_restart); + +/* + * This is our default idle handler. + */ + +void (*arm_pm_idle)(void); + +static void default_idle(void) +{ + if (arm_pm_idle) + arm_pm_idle(); + else + cpu_do_idle(); + local_irq_enable(); +} + +void arch_cpu_idle_prepare(void) +{ + local_fiq_enable(); +} + +void arch_cpu_idle_enter(void) +{ + idle_notifier_call_chain(IDLE_START); + ledtrig_cpu(CPU_LED_IDLE_START); +#ifdef CONFIG_PL310_ERRATA_769419 + wmb(); +#endif +} + +void arch_cpu_idle_exit(void) +{ + ledtrig_cpu(CPU_LED_IDLE_END); + idle_notifier_call_chain(IDLE_END); +} + +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + +/* + * Called from the core idle loop. + */ +void arch_cpu_idle(void) +{ + if (cpuidle_idle_call()) + default_idle(); +} + +void machine_shutdown(void) +{ +#ifdef CONFIG_SMP + smp_send_stop(); +#endif +} + +void machine_halt(void) +{ + machine_shutdown(); + while (1); +} + +extern int reboot_pid; +//static int reboot_pid = 0; + +void machine_power_off(void) +{ + struct task_struct *tsk; + /* Disable interrupts first */ + local_irq_disable(); + local_fiq_disable(); + + machine_shutdown(); + if(reboot_pid > 1) + { + tsk = find_task_by_vpid(reboot_pid); + if(tsk == NULL) + tsk = current; + dump_stack(); + } + else + { + tsk = current; + } + + if(tsk->real_parent) + { + if(tsk->real_parent->real_parent) + { + printk("machine_shutdown: start, Proess(%s:%d). father %s:%d. grandfather %s:%d.\n", + tsk->comm, tsk->pid,tsk->real_parent->comm,tsk->real_parent->pid, + tsk->real_parent->real_parent->comm,tsk->real_parent->real_parent->pid); + } + else + { + printk("machine_shutdown: start, Proess(%s:%d). father %s:%d.\n", + tsk->comm, tsk->pid,tsk->real_parent->comm,tsk->real_parent->pid); + } + } + else + { + printk("machine_shutdown: start, Proess(%s:%d)\n", tsk->comm, tsk->pid); + } + +#ifdef CONFIG_MTK_EMMC_SUPPORT + last_kmsg_store_to_emmc(); +#endif + if (pm_power_off) + pm_power_off(); +} + +void machine_restart(char *cmd) +{ + struct task_struct *tsk; + + /* Disable interrupts first */ + local_irq_disable(); + local_fiq_disable(); + + machine_shutdown(); + + if(reboot_pid > 1) + { + tsk = find_task_by_vpid(reboot_pid); + if(tsk == NULL) + tsk = current; + dump_stack(); + } + else + { + tsk = current; + } + + if(tsk->real_parent) + { + if(tsk->real_parent->real_parent) + { + printk("machine_shutdown: start, Proess(%s:%d). father %s:%d. grandfather %s:%d.\n", + tsk->comm, tsk->pid,tsk->real_parent->comm,tsk->real_parent->pid, + tsk->real_parent->real_parent->comm,tsk->real_parent->real_parent->pid); + } + else + { + printk("machine_shutdown: start, Proess(%s:%d). father %s:%d.\n", + tsk->comm, tsk->pid,tsk->real_parent->comm,tsk->real_parent->pid); + } + } + else + { + printk("machine_shutdown: start, Proess(%s:%d)\n", tsk->comm, tsk->pid); + } + + /* Now call the architecture specific reboot code. */ + if (arm_pm_restart) + arm_pm_restart('h', cmd); +#ifndef CONFIG_MTK_FPGA + else + arch_reset('h', cmd); +#endif + /* + * Whoops - the architecture was unable to reboot. + */ + printk("Reboot failed -- System halted\n"); + while (1); +} + +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + +void __show_regs(struct pt_regs *regs) +{ + int i; + + show_regs_print_info(KERN_DEFAULT); + print_symbol("PC is at %s\n", instruction_pointer(regs)); + print_symbol("LR is at %s\n", regs->regs[30]); + printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", + regs->pc, regs->regs[30], regs->pstate); + printk("sp : %016llx\n", regs->sp); + for (i = 29; i >= 0; i--) { + printk("x%-2d: %016llx ", i, regs->regs[i]); + if (i % 2 == 0) + printk("\n"); + } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); + printk("\n"); +} + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + __show_regs(regs); +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ +} + +static void tls_thread_flush(void) +{ + asm ("msr tpidr_el0, xzr"); + + if (is_compat_task()) { + current->thread.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + */ + barrier(); + asm ("msr tpidrro_el0, xzr"); + } +} + +void flush_thread(void) +{ + fpsimd_flush_thread(); + tls_thread_flush(); + flush_ptrace_hw_breakpoint(current); +} + +void release_thread(struct task_struct *dead_task) +{ +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + fpsimd_preserve_current_state(); + *dst = *src; + return 0; +} + +asmlinkage void ret_from_fork(void) asm("ret_from_fork"); + +int copy_thread(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p) +{ + struct pt_regs *childregs = task_pt_regs(p); + unsigned long tls = p->thread.tp_value; + + memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + + if (likely(!(p->flags & PF_KTHREAD))) { + *childregs = *current_pt_regs(); + childregs->regs[0] = 0; + if (is_compat_thread(task_thread_info(p))) { + if (stack_start) + childregs->compat_sp = stack_start; + } else { + /* + * Read the current TLS pointer from tpidr_el0 as it may be + * out-of-sync with the saved value. + */ + asm("mrs %0, tpidr_el0" : "=r" (tls)); + if (stack_start) { + /* 16-byte aligned stack mandatory on AArch64 */ + if (stack_start & 15) + return -EINVAL; + childregs->sp = stack_start; + } + } + /* + * If a TLS pointer was passed to clone (4th argument), use it + * for the new thread. + */ + if (clone_flags & CLONE_SETTLS) + tls = childregs->regs[3]; + } else { + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->pstate = PSR_MODE_EL1h; + p->thread.cpu_context.x19 = stack_start; + p->thread.cpu_context.x20 = stk_sz; + } + p->thread.cpu_context.pc = (unsigned long)ret_from_fork; + p->thread.cpu_context.sp = (unsigned long)childregs; + p->thread.tp_value = tls; + + ptrace_hw_copy_thread(p); + + return 0; +} + +static void tls_thread_switch(struct task_struct *next) +{ + unsigned long tpidr, tpidrro; + + if (!is_compat_task()) { + asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + current->thread.tp_value = tpidr; + } + + if (is_compat_thread(task_thread_info(next))) { + tpidr = 0; + tpidrro = next->thread.tp_value; + } else { + tpidr = next->thread.tp_value; + tpidrro = 0; + } + + asm( + " msr tpidr_el0, %0\n" + " msr tpidrro_el0, %1" + : : "r" (tpidr), "r" (tpidrro)); +} + +/* + * Thread switching. + */ +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next) +{ + struct task_struct *last; + + fpsimd_thread_switch(next); + tls_thread_switch(next); + hw_breakpoint_thread_switch(next); + contextidr_thread_switch(next); + + /* + * Complete any pending TLB or cache maintenance on this CPU in case + * the thread migrates to a different CPU. + */ + dsb(); + + /* the actual thread switch */ + last = cpu_switch_to(prev, next); + + return last; +} + +unsigned long get_wchan(struct task_struct *p) +{ + struct stackframe frame; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + frame.fp = thread_saved_fp(p); + frame.sp = thread_saved_sp(p); + frame.pc = thread_saved_pc(p); + do { + int ret = unwind_frame(&frame); + if (ret < 0) + return 0; + if (!in_sched_functions(frame.pc)) + return frame.pc; + } while (count ++ < 16); + return 0; +} + +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static unsigned long randomize_base(unsigned long base) +{ + unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; + return randomize_range(base, range_end, 0) ? : base; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + return randomize_base(mm->brk); +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + return randomize_base(base); +} diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c new file mode 100644 index 000000000..1500c9944 --- /dev/null +++ b/arch/arm64/kernel/psci.c @@ -0,0 +1,453 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> + +#include <asm/compiler.h> +#include <asm/cpu_ops.h> +#include <asm/errno.h> +#include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/system_misc.h> + +#if 0 /* move to asm/psci.h for public export */ +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); +}; +#endif //#if 0 /* move to asm/psci.h for public export */ + +struct psci_operations psci_ops; + +static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); + +enum psci_function { + PSCI_FN_CPU_SUSPEND, + PSCI_FN_CPU_ON, + PSCI_FN_CPU_OFF, + PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, + PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + case PSCI_RET_INVALID_PARAMS: + return -EINVAL; + case PSCI_RET_DENIED: + return -EPERM; + }; + + return -EINVAL; +} + +static u32 psci_power_state_pack(struct psci_power_state state) +{ + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +/* + * The following two functions are invoked via the invoke_psci_fn pointer + * and will not be inlined, allowing us to piggyback on the AAPCS. + */ +static noinline int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, + u64 arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "hvc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, + u64 arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + +static int psci_cpu_suspend(struct psci_power_state state, + unsigned long entry_point) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_off(struct psci_power_state state) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_OFF]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_ON]; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE]; + err = invoke_psci_fn(fn, cpuid, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return -ENXIO; + } + + if (!strcmp("hvc", method)) { + invoke_psci_fn = __invoke_psci_fn_hvc; + } else if (!strcmp("smc", method)) { + invoke_psci_fn = __invoke_psci_fn_smc; + } else { + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } + } + + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = (void (*)(char , const char *))psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + + if (!of_property_read_u32(np, "cpu_suspend", &id)) { + psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_ops.cpu_suspend = psci_cpu_suspend; + } + + if (!of_property_read_u32(np, "cpu_off", &id)) { + psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_ops.cpu_off = psci_cpu_off; + } + + if (!of_property_read_u32(np, "cpu_on", &id)) { + psci_function_id[PSCI_FN_CPU_ON] = id; + psci_ops.cpu_on = psci_cpu_on; + } + + if (!of_property_read_u32(np, "migrate", &id)) { + psci_function_id[PSCI_FN_MIGRATE] = id; + psci_ops.migrate = psci_migrate; + } + +out_put_node: + of_node_put(np); + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ + return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} +#endif + +const struct cpu_operations cpu_psci_ops = { + .name = "psci", + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c new file mode 100644 index 000000000..1e0bcd6a9 --- /dev/null +++ b/arch/arm64/kernel/ptrace.c @@ -0,0 +1,1171 @@ +/* + * Based on arch/arm/kernel/ptrace.c + * + * By Ross Biro 1/23/92 + * edited by Linus Torvalds + * ARM modifications Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/audit.h> +#include <linux/compat.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/seccomp.h> +#include <linux/security.h> +#include <linux/init.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/elf.h> + +#include <asm/compat.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/syscall.h> +#include <asm/traps.h> +#include <asm/system_misc.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +/* + * TODO: does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +/* + * Handle hitting a HW-breakpoint. + */ +static void ptrace_hbptriggered(struct perf_event *bp, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); + siginfo_t info = { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_HWBKPT, + .si_addr = (void __user *)(bkpt->trigger), + }; + +#ifdef CONFIG_COMPAT + int i; + + if (!is_compat_task()) + goto send_sig; + + for (i = 0; i < ARM_MAX_BRP; ++i) { + if (current->thread.debug.hbp_break[i] == bp) { + info.si_errno = (i << 1) + 1; + break; + } + } + + for (i = 0; i < ARM_MAX_WRP; ++i) { + if (current->thread.debug.hbp_watch[i] == bp) { + info.si_errno = -((i << 1) + 1); + break; + } + } + +send_sig: +#endif + force_sig_info(SIGTRAP, &info, current); +} + +/* + * Unregister breakpoints from this task and reset the pointers in + * the thread_struct. + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < ARM_MAX_BRP; i++) { + if (t->debug.hbp_break[i]) { + unregister_hw_breakpoint(t->debug.hbp_break[i]); + t->debug.hbp_break[i] = NULL; + } + } + + for (i = 0; i < ARM_MAX_WRP; i++) { + if (t->debug.hbp_watch[i]) { + unregister_hw_breakpoint(t->debug.hbp_watch[i]); + t->debug.hbp_watch[i] = NULL; + } + } +} + +void ptrace_hw_copy_thread(struct task_struct *tsk) +{ + memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); +} + +static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp = ERR_PTR(-EINVAL); + + switch (note_type) { + case NT_ARM_HW_BREAK: + if (idx < ARM_MAX_BRP) + bp = tsk->thread.debug.hbp_break[idx]; + break; + case NT_ARM_HW_WATCH: + if (idx < ARM_MAX_WRP) + bp = tsk->thread.debug.hbp_watch[idx]; + break; + } + + return bp; +} + +static int ptrace_hbp_set_event(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + struct perf_event *bp) +{ + int err = -EINVAL; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if (idx < ARM_MAX_BRP) { + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; + } + break; + case NT_ARM_HW_WATCH: + if (idx < ARM_MAX_WRP) { + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; + } + break; + } + + return err; +} + +static struct perf_event *ptrace_hbp_create(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp; + struct perf_event_attr attr; + int err, type; + + switch (note_type) { + case NT_ARM_HW_BREAK: + type = HW_BREAKPOINT_X; + break; + case NT_ARM_HW_WATCH: + type = HW_BREAKPOINT_RW; + break; + default: + return ERR_PTR(-EINVAL); + } + + ptrace_breakpoint_init(&attr); + + /* + * Initialise fields to sane defaults + * (i.e. values that will pass validation). + */ + attr.bp_addr = 0; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = type; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk); + if (IS_ERR(bp)) + return bp; + + err = ptrace_hbp_set_event(note_type, tsk, idx, bp); + if (err) + return ERR_PTR(err); + + return bp; +} + +static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, + struct arch_hw_breakpoint_ctrl ctrl, + struct perf_event_attr *attr) +{ + int err, len, type, disabled = !ctrl.enabled; + + attr->disabled = disabled; + if (disabled) + return 0; + + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) + return -EINVAL; + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; + } + + attr->bp_len = len; + attr->bp_type = type; + + return 0; +} + +static int ptrace_hbp_get_resource_info(unsigned int note_type, u32 *info) +{ + u8 num; + u32 reg = 0; + + switch (note_type) { + case NT_ARM_HW_BREAK: + num = hw_breakpoint_slots(TYPE_INST); + break; + case NT_ARM_HW_WATCH: + num = hw_breakpoint_slots(TYPE_DATA); + break; + default: + return -EINVAL; + } + + reg |= debug_monitors_arch(); + reg <<= 8; + reg |= num; + + *info = reg; + return 0; +} + +static int ptrace_hbp_get_ctrl(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u32 *ctrl) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (IS_ERR(bp)) + return PTR_ERR(bp); + + *ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0; + return 0; +} + +static int ptrace_hbp_get_addr(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u64 *addr) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (IS_ERR(bp)) + return PTR_ERR(bp); + + *addr = bp ? bp->attr.bp_addr : 0; + return 0; +} + +static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx) +{ + struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + + if (!bp) + bp = ptrace_hbp_create(note_type, tsk, idx); + + return bp; +} + +static int ptrace_hbp_set_ctrl(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u32 uctrl) +{ + int err; + struct perf_event *bp; + struct perf_event_attr attr; + struct arch_hw_breakpoint_ctrl ctrl; + + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + return err; + } + + attr = bp->attr; + decode_ctrl_reg(uctrl, &ctrl); + err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); + if (err) + return err; + + return modify_user_hw_breakpoint(bp, &attr); +} + +static int ptrace_hbp_set_addr(unsigned int note_type, + struct task_struct *tsk, + unsigned long idx, + u64 addr) +{ + int err; + struct perf_event *bp; + struct perf_event_attr attr; + + bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); + if (IS_ERR(bp)) { + err = PTR_ERR(bp); + return err; + } + + attr = bp->attr; + attr.bp_addr = addr; + err = modify_user_hw_breakpoint(bp, &attr); + return err; +} + +#define PTRACE_HBP_ADDR_SZ sizeof(u64) +#define PTRACE_HBP_CTRL_SZ sizeof(u32) +#define PTRACE_HBP_PAD_SZ sizeof(u32) + +static int hw_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + int ret, idx = 0, offset, limit; + u32 info, ctrl; + u64 addr; + + /* Resource info */ + ret = ptrace_hbp_get_resource_info(note_type, &info); + if (ret) + return ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, + sizeof(info)); + if (ret) + return ret; + + /* Pad */ + offset = offsetof(struct user_hwdebug_state, pad); + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) + return ret; + + /* (address, ctrl) registers */ + offset = offsetof(struct user_hwdebug_state, dbg_regs); + limit = regset->n * regset->size; + while (count && offset < limit) { + ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); + if (ret) + return ret; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_ADDR_SZ; + + ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl); + if (ret) + return ret; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_CTRL_SZ; + + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_PAD_SZ; + idx++; + } + + return 0; +} + +static int hw_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + int ret, idx = 0, offset, limit; + u32 ctrl; + u64 addr; + + /* Resource info and pad */ + offset = offsetof(struct user_hwdebug_state, dbg_regs); + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset); + if (ret) + return ret; + + /* (address, ctrl) registers */ + limit = regset->n * regset->size; + while (count && offset < limit) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) + return ret; + ret = ptrace_hbp_set_addr(note_type, target, idx, addr); + if (ret) + return ret; + offset += PTRACE_HBP_ADDR_SZ; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) + return ret; + ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl); + if (ret) + return ret; + offset += PTRACE_HBP_CTRL_SZ; + + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) + return ret; + offset += PTRACE_HBP_PAD_SZ; + idx++; + } + + return 0; +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_pt_regs newregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); + if (ret) + return ret; + + if (!valid_user_regs(&newregs)) + return -EINVAL; + + task_pt_regs(target)->user_regs = newregs; + return 0; +} + +/* + * TODO: update fp accessors for lazy context switching (sync/flush hwstate) + */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + uregs = &target->thread.fpsimd_state.user_fpsimd; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_fpsimd_state newstate; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); + if (ret) + return ret; + + target->thread.fpsimd_state.user_fpsimd = newstate; + fpsimd_flush_task_state(target); + return ret; +} + +static int tls_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long *tls = &target->thread.tp_value; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); +} + +static int tls_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + unsigned long tls; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + + target->thread.tp_value = tls; + return ret; +} + +enum aarch64_regset { + REGSET_GPR, + REGSET_FPR, + REGSET_TLS, +#ifdef CONFIG_HAVE_HW_BREAKPOINT + REGSET_HW_BREAK, + REGSET_HW_WATCH, +#endif +}; + +static const struct user_regset aarch64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_pt_regs) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpsimd_state) / sizeof(u32), + /* + * We pretend we have 32-bit registers because the fpsr and + * fpcr are 32-bits wide. + */ + .size = sizeof(u32), + .align = sizeof(u32), + .get = fpr_get, + .set = fpr_set + }, + [REGSET_TLS] = { + .core_note_type = NT_ARM_TLS, + .n = 1, + .size = sizeof(void *), + .align = sizeof(void *), + .get = tls_get, + .set = tls_set, + }, +#ifdef CONFIG_HAVE_HW_BREAKPOINT + [REGSET_HW_BREAK] = { + .core_note_type = NT_ARM_HW_BREAK, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, + [REGSET_HW_WATCH] = { + .core_note_type = NT_ARM_HW_WATCH, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, +#endif +}; + +static const struct user_regset_view user_aarch64_view = { + .name = "aarch64", .e_machine = EM_AARCH64, + .regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets) +}; + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +enum compat_regset { + REGSET_COMPAT_GPR, + REGSET_COMPAT_VFP, +}; + +static int compat_gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret = 0; + unsigned int i, start, num_regs; + + /* Calculate the number of AArch32 registers contained in count */ + num_regs = count / regset->size; + + /* Convert pos into an register number */ + start = pos / regset->size; + + if (start + num_regs > regset->n) + return -EIO; + + for (i = 0; i < num_regs; ++i) { + unsigned int idx = start + i; + void *reg; + + switch (idx) { + case 15: + reg = (void *)&task_pt_regs(target)->pc; + break; + case 16: + reg = (void *)&task_pt_regs(target)->pstate; + break; + case 17: + reg = (void *)&task_pt_regs(target)->orig_x0; + break; + default: + reg = (void *)&task_pt_regs(target)->regs[idx]; + } + + if (!ubuf && kbuf) { + if (i == 0 && NULL != target && target->pid == current->pid) + printk(KERN_WARNING "coredump(%d) copy registers to kbuf\n", current->pid); + memcpy(kbuf, reg, sizeof(compat_ulong_t)); + kbuf += sizeof(compat_ulong_t); + } + else { + ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + + if (ret) + break; + else + ubuf += sizeof(compat_ulong_t); + } + } + + return ret; +} + +static int compat_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs newregs; + int ret = 0; + unsigned int i, start, num_regs; + + /* Calculate the number of AArch32 registers contained in count */ + num_regs = count / regset->size; + + /* Convert pos into an register number */ + start = pos / regset->size; + + if (start + num_regs > regset->n) + return -EIO; + + newregs = *task_pt_regs(target); + + for (i = 0; i < num_regs; ++i) { + unsigned int idx = start + i; + void *reg; + + switch (idx) { + case 15: + reg = (void *)&newregs.pc; + break; + case 16: + reg = (void *)&newregs.pstate; + break; + case 17: + reg = (void *)&newregs.orig_x0; + break; + default: + reg = (void *)&newregs.regs[idx]; + } + + ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); + + if (ret) + goto out; + else + ubuf += sizeof(compat_ulong_t); + } + + if (valid_user_regs(&newregs.user_regs)) + *task_pt_regs(target) = newregs; + else + ret = -EINVAL; + +out: + return ret; +} + +static int compat_vfp_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + compat_ulong_t fpscr; + int ret; + + uregs = &target->thread.fpsimd_state.user_fpsimd; + + /* + * The VFP registers are packed into the fpsimd_state, so they all sit + * nicely together for us. We just need to create the fpscr separately. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, + VFP_STATE_SIZE - sizeof(compat_ulong_t)); + + if (count && !ret) { + fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | + (uregs->fpcr & VFP_FPSCR_CTRL_MASK); + ret = put_user(fpscr, (compat_ulong_t *)ubuf); + } + + return ret; +} + +static int compat_vfp_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fpsimd_state *uregs; + compat_ulong_t fpscr; + int ret; + + if (pos + count > VFP_STATE_SIZE) + return -EIO; + + uregs = &target->thread.fpsimd_state.user_fpsimd; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, + VFP_STATE_SIZE - sizeof(compat_ulong_t)); + + if (count && !ret) { + ret = get_user(fpscr, (compat_ulong_t *)ubuf); + uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; + uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + } + + fpsimd_flush_task_state(target); + return ret; +} + +static const struct user_regset aarch32_regsets[] = { + [REGSET_COMPAT_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = COMPAT_ELF_NGREG, + .size = sizeof(compat_elf_greg_t), + .align = sizeof(compat_elf_greg_t), + .get = compat_gpr_get, + .set = compat_gpr_set + }, + [REGSET_COMPAT_VFP] = { + .core_note_type = NT_ARM_VFP, + .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), + .size = sizeof(compat_ulong_t), + .align = sizeof(compat_ulong_t), + .get = compat_vfp_get, + .set = compat_vfp_set + }, +}; + +static const struct user_regset_view user_aarch32_view = { + .name = "aarch32", .e_machine = EM_ARM, + .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) +}; + +static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, + compat_ulong_t __user *ret) +{ + compat_ulong_t tmp; + + if (off & 3) + return -EIO; + + if (off == COMPAT_PT_TEXT_ADDR) + tmp = tsk->mm->start_code; + else if (off == COMPAT_PT_DATA_ADDR) + tmp = tsk->mm->start_data; + else if (off == COMPAT_PT_TEXT_END_ADDR) + tmp = tsk->mm->end_code; + else if (off < sizeof(compat_elf_gregset_t)) + return copy_regset_to_user(tsk, &user_aarch32_view, + REGSET_COMPAT_GPR, off, + sizeof(compat_ulong_t), ret); + else if (off >= COMPAT_USER_SZ) + return -EIO; + else + tmp = 0; + + return put_user(tmp, ret); +} + +static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, + compat_ulong_t val) +{ + int ret; + mm_segment_t old_fs = get_fs(); + + if (off & 3 || off >= COMPAT_USER_SZ) + return -EIO; + + if (off >= sizeof(compat_elf_gregset_t)) + return 0; + + set_fs(KERNEL_DS); + ret = copy_regset_from_user(tsk, &user_aarch32_view, + REGSET_COMPAT_GPR, off, + sizeof(compat_ulong_t), + &val); + set_fs(old_fs); + + return ret; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + +/* + * Convert a virtual register number into an index for a thread_info + * breakpoint array. Breakpoints are identified using positive numbers + * whilst watchpoints are negative. The registers are laid out as pairs + * of (address, control), each pair mapping to a unique hw_breakpoint struct. + * Register 0 is reserved for describing resource information. + */ +static int compat_ptrace_hbp_num_to_idx(compat_long_t num) +{ + return (abs(num) - 1) >> 1; +} + +static int compat_ptrace_hbp_get_resource_info(u32 *kdata) +{ + u8 num_brps, num_wrps, debug_arch, wp_len; + u32 reg = 0; + + num_brps = hw_breakpoint_slots(TYPE_INST); + num_wrps = hw_breakpoint_slots(TYPE_DATA); + + debug_arch = debug_monitors_arch(); + wp_len = 8; + reg |= debug_arch; + reg <<= 8; + reg |= wp_len; + reg <<= 8; + reg |= num_wrps; + reg <<= 8; + reg |= num_brps; + + *kdata = reg; + return 0; +} + +static int compat_ptrace_hbp_get(unsigned int note_type, + struct task_struct *tsk, + compat_long_t num, + u32 *kdata) +{ + u64 addr = 0; + u32 ctrl = 0; + + int err, idx = compat_ptrace_hbp_num_to_idx(num);; + + if (num & 1) { + err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); + *kdata = (u32)addr; + } else { + err = ptrace_hbp_get_ctrl(note_type, tsk, idx, &ctrl); + *kdata = ctrl; + } + + return err; +} + +static int compat_ptrace_hbp_set(unsigned int note_type, + struct task_struct *tsk, + compat_long_t num, + u32 *kdata) +{ + u64 addr; + u32 ctrl; + + int err, idx = compat_ptrace_hbp_num_to_idx(num); + + if (num & 1) { + addr = *kdata; + err = ptrace_hbp_set_addr(note_type, tsk, idx, addr); + } else { + ctrl = *kdata; + err = ptrace_hbp_set_ctrl(note_type, tsk, idx, ctrl); + } + + return err; +} + +static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, + compat_ulong_t __user *data) +{ + int ret; + u32 kdata; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + /* Watchpoint */ + if (num < 0) { + ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); + /* Resource info */ + } else if (num == 0) { + ret = compat_ptrace_hbp_get_resource_info(&kdata); + /* Breakpoint */ + } else { + ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); + } + set_fs(old_fs); + + if (!ret) + ret = put_user(kdata, data); + + return ret; +} + +static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, + compat_ulong_t __user *data) +{ + int ret; + u32 kdata = 0; + mm_segment_t old_fs = get_fs(); + + if (num == 0) + return 0; + + ret = get_user(kdata, data); + if (ret) + return ret; + + set_fs(KERNEL_DS); + if (num < 0) + ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); + else + ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); + set_fs(old_fs); + + return ret; +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + + switch (request) { + case PTRACE_PEEKUSR: + ret = compat_ptrace_read_user(child, addr, datap); + break; + + case PTRACE_POKEUSR: + ret = compat_ptrace_write_user(child, addr, data); + break; + + case COMPAT_PTRACE_GETREGS: + ret = copy_regset_to_user(child, + &user_aarch32_view, + REGSET_COMPAT_GPR, + 0, sizeof(compat_elf_gregset_t), + datap); + break; + + case COMPAT_PTRACE_SETREGS: + ret = copy_regset_from_user(child, + &user_aarch32_view, + REGSET_COMPAT_GPR, + 0, sizeof(compat_elf_gregset_t), + datap); + break; + + case COMPAT_PTRACE_GET_THREAD_AREA: + ret = put_user((compat_ulong_t)child->thread.tp_value, + (compat_ulong_t __user *)datap); + break; + + case COMPAT_PTRACE_SET_SYSCALL: + task_pt_regs(child)->syscallno = data; + ret = 0; + break; + + case COMPAT_PTRACE_GETVFPREGS: + ret = copy_regset_to_user(child, + &user_aarch32_view, + REGSET_COMPAT_VFP, + 0, VFP_STATE_SIZE, + datap); + break; + + case COMPAT_PTRACE_SETVFPREGS: + ret = copy_regset_from_user(child, + &user_aarch32_view, + REGSET_COMPAT_VFP, + 0, VFP_STATE_SIZE, + datap); + break; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + case COMPAT_PTRACE_GETHBPREGS: + ret = compat_ptrace_gethbpregs(child, addr, datap); + break; + + case COMPAT_PTRACE_SETHBPREGS: + ret = compat_ptrace_sethbpregs(child, addr, datap); + break; +#endif + + default: + ret = compat_ptrace_request(child, request, addr, + data); + break; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (is_compat_thread(task_thread_info(task))) + return &user_aarch32_view; +#endif + return &user_aarch64_view; +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + + switch (request) { + case PTRACE_SET_SYSCALL: + task_pt_regs(child)->syscallno = data; + ret = 0; + break; + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) +{ + int regno; + unsigned long saved_reg; + + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; + + if (dir == PTRACE_SYSCALL_EXIT) + tracehook_report_syscall_exit(regs, 0); + else if (tracehook_report_syscall_entry(regs)) + regs->syscallno = ~0UL; + + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + unsigned int saved_syscallno = regs->syscallno; + + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs->syscallno) == -1) + return RET_SKIP_SYSCALL_TRACE; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); + + if (IS_SKIP_SYSCALL(regs->syscallno)) { + /* + * RESTRICTION: we can't modify a return value of user + * issued syscall(-1) here. In order to ease this flavor, + * we need to treat whatever value in x0 as a return value, + * but this might result in a bogus value being returned. + */ + /* + * NOTE: syscallno may also be set to -1 if fatal signal is + * detected in tracehook_report_syscall_entry(), but since + * a value set to x0 here is not used in this case, we may + * neglect the case. + */ + if (!test_thread_flag(TIF_SYSCALL_TRACE) || + (IS_SKIP_SYSCALL(saved_syscallno))) + regs->regs[0] = -ENOSYS; + } + + audit_syscall_entry(syscall_get_arch(), regs->syscallno, + regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); + + return regs->syscallno; +} + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 000000000..89102a6ff --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c new file mode 100644 index 000000000..268f50466 --- /dev/null +++ b/arch/arm64/kernel/setup.c @@ -0,0 +1,519 @@ +/* + * Based on arch/arm/kernel/setup.c + * + * Copyright (C) 1995-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/utsname.h> +#include <linux/initrd.h> +#include <linux/console.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/screen_info.h> +#include <linux/init.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/root_dev.h> +#include <linux/clk-provider.h> +#include <linux/cpu.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/of_platform.h> + +#include <asm/cputype.h> +#include <asm/elf.h> +#include <asm/cputable.h> +#include <asm/cpu_ops.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/traps.h> +#include <asm/memblock.h> +#include <asm/psci.h> + +unsigned int processor_id; +EXPORT_SYMBOL(processor_id); + +unsigned long elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + +static const char *cpu_name; +static const char *machine_name; +phys_addr_t __fdt_pointer __initdata; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { + { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + }, + { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_MEM + } +}; + +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] + +void __init early_print(const char *str, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, str); + vsnprintf(buf, sizeof(buf), str, ap); + va_end(ap); + + printk("%s", buf); +} + +void __init smp_setup_processor_id(void) +{ + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} + +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ + struct cpu_info *cpu_info; + u64 features, block; + + cpu_info = lookup_processor_type(read_cpuid_id()); + if (!cpu_info) { + printk("CPU configuration botched (ID %08x), unable to continue.\n", + read_cpuid_id()); + while (1); + } + + cpu_name = cpu_info->cpu_name; + + printk("CPU: %s [%08x] revision %d\n", + cpu_name, read_cpuid_id(), read_cpuid_id() & 15); + + sprintf(init_utsname()->machine, "aarch64"); + elf_hwcap = 0; + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; +} + +static void __init setup_machine_fdt(phys_addr_t dt_phys) +{ + struct boot_param_header *devtree; + unsigned long dt_root; + + /* Check we have a non-NULL DT pointer */ + if (!dt_phys) { + early_print("\n" + "Error: NULL or invalid device tree blob\n" + "The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" + "\nPlease check your bootloader.\n"); + + while (true) + cpu_relax(); + + } + + devtree = phys_to_virt(dt_phys); + + /* Check device tree validity */ + if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) { + early_print("\n" + "Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" + "Expected 0x%x, found 0x%x\n" + "\nPlease check your bootloader.\n", + dt_phys, devtree, OF_DT_HEADER, + be32_to_cpu(devtree->magic)); + + while (true) + cpu_relax(); + } + + initial_boot_params = devtree; + dt_root = of_get_flat_dt_root(); + + machine_name = of_get_flat_dt_prop(dt_root, "model", NULL); + if (!machine_name) + machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL); + if (!machine_name) + machine_name = "<unknown>"; + pr_info("Machine: %s\n", machine_name); + + /* Retrieve various information from the /chosen node */ + of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); + /* Initialize {size,address}-cells info */ + of_scan_flat_dt(early_init_dt_scan_root, NULL); + /* Setup memory, calling early_init_dt_add_memory_arch */ + of_scan_flat_dt(early_init_dt_scan_memory, NULL); +} + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + base &= PAGE_MASK; + size &= PAGE_MASK; + if (base + size < PHYS_OFFSET) { + pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", + base, base + size); + return; + } + if (base < PHYS_OFFSET) { + pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", + base, PHYS_OFFSET); + size -= PHYS_OFFSET - base; + base = PHYS_OFFSET; + } + memblock_add(base, size); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return __va(memblock_alloc(size, align)); +} + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + phys_addr_t limit; + + if (!p) + return 1; + + limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", limit >> 20); + + memblock_enforce_memory_limit(limit); + + return 0; +} +early_param("mem", early_mem); + +static void __init request_standard_resources(void) +{ + struct memblock_region *region; + struct resource *res; + + kernel_code.start = virt_to_phys(_text); + kernel_code.end = virt_to_phys(_etext - 1); + kernel_data.start = virt_to_phys(_sdata); + kernel_data.end = virt_to_phys(_end - 1); + + for_each_memblock(memory, region) { + res = alloc_bootmem_low(sizeof(*res)); + res->name = "System RAM"; + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + request_resource(&iomem_resource, res); + + if (kernel_code.start >= res->start && + kernel_code.end <= res->end) + request_resource(res, &kernel_code); + if (kernel_data.start >= res->start && + kernel_data.end <= res->end) + request_resource(res, &kernel_data); + } +} + +u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + +void __init setup_arch(char **cmdline_p) +{ + setup_processor(); + + setup_machine_fdt(__fdt_pointer); + + init_mm.start_code = (unsigned long) _text; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) _end; + + *cmdline_p = boot_command_line; + + parse_early_param(); + + arm64_memblock_init(); + + paging_init(); + request_standard_resources(); + + unflatten_device_tree(); + + psci_init(); + + cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); +#ifdef CONFIG_SMP + smp_init_cpus(); + smp_build_mpidr_hash(); +#endif + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +#endif +#endif + +} + +static int __init arm64_device_init(void) +{ + of_clk_init(NULL); + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + return 0; +} +arch_initcall_sync(arm64_device_init); + +static DEFINE_PER_CPU(struct cpu, cpu_data); + +static int __init topology_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cpu *cpu = &per_cpu(cpu_data, i); + cpu->hotpluggable = 1; + register_cpu(cpu, i); + } + + return 0; +} +subsys_initcall(topology_init); + +static const char *hwcap_str[] = { + "fp", + "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; + +static int c_show(struct seq_file *m, void *v) +{ + int i; + + seq_printf(m, "Processor\t: %s rev %d (%s)\n", + cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + + for_each_online_cpu(i) { + /* + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. + */ +#ifdef CONFIG_SMP + seq_printf(m, "processor\t: %d\n", i); +#endif + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + } + + /* dump out the processor features */ + seq_puts(m, "Features\t: "); + + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, "%s ", hwcap_str[i]); +#ifdef CONFIG_ARMV7_COMPAT_CPUINFO + if (is_compat_task()) { + /* Print out the non-optional ARMv8 HW capabilities */ + seq_printf(m, "wp half thumb fastmult vfp edsp neon vfpv3 tlsi "); + seq_printf(m, "vfpv4 idiva idivt "); + } +#endif + + seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); + seq_printf(m, "CPU architecture: %s\n", +#if IS_ENABLED(CONFIG_ARMV7_COMPAT_CPUINFO) + is_compat_task() ? "8" : +#endif + "AArch64"); + seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); + seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); + seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); + + seq_puts(m, "\n"); + + seq_printf(m, "Hardware\t: %s\n", machine_name); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c new file mode 100644 index 000000000..182b6fc01 --- /dev/null +++ b/arch/arm64/kernel/signal.c @@ -0,0 +1,420 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/freezer.h> +#include <linux/uaccess.h> +#include <linux/tracehook.h> +#include <linux/ratelimit.h> + +#include <asm/debug-monitors.h> +#include <asm/elf.h> +#include <asm/cacheflush.h> +#include <asm/ucontext.h> +#include <asm/unistd.h> +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/vdso.h> + +/* + * Do a signal return; undo the signal stack. These are aligned to 128-bit. + */ +struct rt_sigframe { + struct siginfo info; + struct ucontext uc; + u64 fp; + u64 lr; +}; + +static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) +{ + struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; + int err; + + /* dump the hardware registers to the fpsimd_state structure */ + fpsimd_preserve_current_state(); + + /* copy the FP and status/control registers */ + err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); + __put_user_error(fpsimd->fpsr, &ctx->fpsr, err); + __put_user_error(fpsimd->fpcr, &ctx->fpcr, err); + + /* copy the magic/size information */ + __put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err); + + return err ? -EFAULT : 0; +} + +static int restore_fpsimd_context(struct fpsimd_context __user *ctx) +{ + struct fpsimd_state fpsimd; + __u32 magic, size; + int err = 0; + + /* check the magic/size information */ + __get_user_error(magic, &ctx->head.magic, err); + __get_user_error(size, &ctx->head.size, err); + if (err) + return -EFAULT; + if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context)) + return -EINVAL; + + /* copy the FP and status/control registers */ + err = __copy_from_user(fpsimd.vregs, ctx->vregs, + sizeof(fpsimd.vregs)); + __get_user_error(fpsimd.fpsr, &ctx->fpsr, err); + __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); + + /* load the hardware registers from the fpsimd_state structure */ + if (!err) + fpsimd_update_current_state(&fpsimd); + + return err ? -EFAULT : 0; +} + +static int restore_sigframe(struct pt_regs *regs, + struct rt_sigframe __user *sf) +{ + sigset_t set; + int i, err; + struct aux_context __user *aux = + (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + + err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); + if (err == 0) + set_current_blocked(&set); + + for (i = 0; i < 31; i++) + __get_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], + err); + __get_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); + __get_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); + __get_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + + /* + * Avoid sys_rt_sigreturn() restarting. + */ + regs->syscallno = ~0UL; + + err |= !valid_user_regs(®s->user_regs); + + if (err == 0) + err |= restore_fpsimd_context(&aux->fpsimd); + + return err; +} + +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 128-bit boundary, then 'sp' should + * be word aligned here. + */ + if (regs->sp & 15) + goto badframe; + + frame = (struct rt_sigframe __user *)regs->sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (restore_sigframe(regs, frame)) + goto badframe; + + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +static int setup_sigframe(struct rt_sigframe __user *sf, + struct pt_regs *regs, sigset_t *set) +{ + int i, err = 0; + struct aux_context __user *aux = + (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + + /* set up the stack frame for unwinding */ + __put_user_error(regs->regs[29], &sf->fp, err); + __put_user_error(regs->regs[30], &sf->lr, err); + + for (i = 0; i < 31; i++) + __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], + err); + __put_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); + __put_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); + __put_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + + __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); + + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); + + if (err == 0) + err |= preserve_fpsimd_context(&aux->fpsimd); + + /* set the "end" magic */ + __put_user_error(0, &aux->end.magic, err); + __put_user_error(0, &aux->end.size, err); + + return err; +} + +static struct rt_sigframe __user *get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs) +{ + unsigned long sp, sp_top; + struct rt_sigframe __user *frame; + + sp = sp_top = regs->sp; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = sp_top = current->sas_ss_sp + current->sas_ss_size; + + sp = (sp - sizeof(struct rt_sigframe)) & ~15; + frame = (struct rt_sigframe __user *)sp; + + /* + * Check that we can actually write to the signal frame. + */ + if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) + frame = NULL; + + return frame; +} + +static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, + void __user *frame, int usig) +{ + __sigrestore_t sigtramp; + + regs->regs[0] = usig; + regs->sp = (unsigned long)frame; + regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); + regs->pc = (unsigned long)ka->sa.sa_handler; + + if (ka->sa.sa_flags & SA_RESTORER) + sigtramp = ka->sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + regs->regs[30] = (unsigned long)sigtramp; +} + +static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + int err = 0; + + frame = get_sigframe(ka, regs); + if (!frame) + return 1; + + __put_user_error(0, &frame->uc.uc_flags, err); + __put_user_error(NULL, &frame->uc.uc_link, err); + + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); + err |= setup_sigframe(frame, regs, set); + if (err == 0) { + setup_return(regs, ka, frame, usig); + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, info); + regs->regs[1] = (unsigned long)&frame->info; + regs->regs[2] = (unsigned long)&frame->uc; + } + } + + return err; +} + +static void setup_restart_syscall(struct pt_regs *regs) +{ + if (is_compat_task()) + compat_setup_restart_syscall(regs); + else + regs->regs[8] = __NR_restart_syscall; +} + +/* + * OK, we're invoking a handler + */ +static void handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, struct pt_regs *regs) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = current; + sigset_t *oldset = sigmask_to_save(); + int usig = sig; + int ret; + + /* + * translate the signal + */ + if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) + usig = thread->exec_domain->signal_invmap[usig]; + + /* + * Set up the stack frame + */ + if (is_compat_task()) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = compat_setup_rt_frame(usig, ka, info, oldset, + regs); + else + ret = compat_setup_frame(usig, ka, oldset, regs); + } else { + ret = setup_rt_frame(usig, ka, info, oldset, regs); + } + + /* + * Check that the resulting registers are actually sane. + */ + ret |= !valid_user_regs(®s->user_regs); + + if (ret != 0) { + force_sigsegv(sig, tsk); + return; + } + + /* + * Fast forward the stepping logic so we step into the signal + * handler. + */ + user_fastforward_single_step(tsk); + + signal_delivered(sig, info, ka, regs, 0); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +static void do_signal(struct pt_regs *regs) +{ + unsigned long continue_addr = 0, restart_addr = 0; + struct k_sigaction ka; + siginfo_t info; + int signr, retval = 0; + int syscall = (int)regs->syscallno; + + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall >= 0) { + continue_addr = regs->pc; + restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); + retval = regs->regs[0]; + + /* + * Avoid additional syscall restarting via ret_to_user. + */ + regs->syscallno = ~0UL; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTART_RESTARTBLOCK: + regs->regs[0] = regs->orig_x0; + regs->pc = restart_addr; + break; + } + } + + /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all of our registers. + */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* + * Depending on the signal settings, we may need to revert the + * decision to restart the system call, but skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->pc == restart_addr && + (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK || + (retval == -ERESTARTSYS && + !(ka.sa.sa_flags & SA_RESTART)))) { + regs->regs[0] = -EINTR; + regs->pc = continue_addr; + } + + handle_signal(signr, &ka, &info, regs); + return; + } + + /* + * Handle restarting a different system call. As above, if a debugger + * has chosen to restart at a different PC, ignore the restart. + */ + if (syscall >= 0 && regs->pc == restart_addr) { + if (retval == -ERESTART_RESTARTBLOCK) + setup_restart_syscall(regs); + user_rewind_single_step(current); + } + + restore_saved_sigmask(); +} + +asmlinkage void do_notify_resume(struct pt_regs *regs, + unsigned int thread_flags) +{ + if (thread_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + +} diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c new file mode 100644 index 000000000..e1b8b9fb2 --- /dev/null +++ b/arch/arm64/kernel/signal32.c @@ -0,0 +1,605 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * Modified by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/syscalls.h> +#include <linux/ratelimit.h> + +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +struct compat_sigcontext { + /* We always set these two fields to 0 */ + compat_ulong_t trap_no; + compat_ulong_t error_code; + + compat_ulong_t oldmask; + compat_ulong_t arm_r0; + compat_ulong_t arm_r1; + compat_ulong_t arm_r2; + compat_ulong_t arm_r3; + compat_ulong_t arm_r4; + compat_ulong_t arm_r5; + compat_ulong_t arm_r6; + compat_ulong_t arm_r7; + compat_ulong_t arm_r8; + compat_ulong_t arm_r9; + compat_ulong_t arm_r10; + compat_ulong_t arm_fp; + compat_ulong_t arm_ip; + compat_ulong_t arm_sp; + compat_ulong_t arm_lr; + compat_ulong_t arm_pc; + compat_ulong_t arm_cpsr; + compat_ulong_t fault_address; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + compat_stack_t uc_stack; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; + int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; + compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_vfp_sigframe { + compat_ulong_t magic; + compat_ulong_t size; + struct compat_user_vfp { + compat_u64 fpregs[32]; + compat_ulong_t fpscr; + } ufp; + struct compat_user_vfp_exc { + compat_ulong_t fpexc; + compat_ulong_t fpinst; + compat_ulong_t fpinst2; + } ufp_exc; +} __attribute__((__aligned__(8))); + +#define VFP_MAGIC 0x56465001 +#define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe) + +struct compat_aux_sigframe { + struct compat_vfp_sigframe vfp; + + /* Something that isn't a valid magic number for any coprocessor. */ + unsigned long end_magic; +} __attribute__((__aligned__(8))); + +struct compat_sigframe { + struct compat_ucontext uc; + compat_ulong_t retcode[2]; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_sigframe sig; +}; + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +/* + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + */ +#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) +#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) +#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) +#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) + +/* + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ +#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ + 0x2700 | __NR_compat_sigreturn) +#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ + 0x2700 | __NR_compat_rt_sigreturn) + +const compat_ulong_t aarch32_sigret_code[6] = { + /* + * AArch32 sigreturn code. + * We don't construct an OABI SWI - instead we just set the imm24 field + * to the EABI syscall number so that we create a sane disassembly. + */ + MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, + MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, +}; + +static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) +{ + compat_sigset_t cset; + + cset.sig[0] = set->sig[0] & 0xffffffffull; + cset.sig[1] = set->sig[0] >> 32; + + return copy_to_user(uset, &cset, sizeof(*uset)); +} + +static inline int get_sigset_t(sigset_t *set, + const compat_sigset_t __user *uset) +{ + compat_sigset_t s32; + + if (copy_from_user(&s32, uset, sizeof(*uset))) + return -EFAULT; + + set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + return 0; +} + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err; + + if (!access_ok(VERIFY_WRITE, to, sizeof(*to))) + return -EFAULT; + + /* If you change siginfo_t structure, please be sure + * this code is fixed accordingly. + * It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic + * 3 ints plus the relevant union member. + * This routine must convert siginfo from 64bit to 32bit as well + * at the same time. + */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + if (from->si_code < 0) + err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, + SI_PAD_SIZE); + else switch (from->si_code & __SI_MASK) { + case __SI_KILL: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + case __SI_TIMER: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, + &to->si_ptr); + break; + case __SI_POLL: + err |= __put_user(from->si_band, &to->si_band); + err |= __put_user(from->si_fd, &to->si_fd); + break; + case __SI_FAULT: + err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, + &to->si_addr); +#ifdef BUS_MCEERR_AO + /* + * Other callers might not initialize the si_lsb field, + * so check explicitely for the right codes here. + */ + if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); +#endif + break; + case __SI_CHLD: + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_status, &to->si_status); + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + break; + case __SI_RT: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ: /* But this is */ + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); + break; +#ifdef __ARCH_SIGSYS + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; +#endif + default: /* this is just in case for now ... */ + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_uid, &to->si_uid); + break; + } + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + memset(to, 0, sizeof *to); + + if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || + copy_from_user(to->_sifields._pad, + from->_sifields._pad, SI_PAD_SIZE)) + return -EFAULT; + + return 0; +} + +/* + * VFP save/restore code. + */ +static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) +{ + struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr, fpexc; + int err = 0; + + /* + * Save the hardware registers to the fpsimd_state structure. + * Note that this also saves V16-31, which aren't visible + * in AArch32. + */ + fpsimd_preserve_current_state(); + + /* Place structure header on the stack */ + __put_user_error(magic, &frame->magic, err); + __put_user_error(size, &frame->size, err); + + /* + * Now copy the FP registers. Since the registers are packed, + * we can copy the prefix we want (V0-V15) as it is. + * FIXME: Won't work if big endian. + */ + err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, + sizeof(frame->ufp.fpregs)); + + /* Create an AArch32 fpscr from the fpsr and the fpcr. */ + fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | + (fpsimd->fpcr & VFP_FPSCR_CTRL_MASK); + __put_user_error(fpscr, &frame->ufp.fpscr, err); + + /* + * The exception register aren't available so we fake up a + * basic FPEXC and zero everything else. + */ + fpexc = (1 << 30); + __put_user_error(fpexc, &frame->ufp_exc.fpexc, err); + __put_user_error(0, &frame->ufp_exc.fpinst, err); + __put_user_error(0, &frame->ufp_exc.fpinst2, err); + + return err ? -EFAULT : 0; +} + +static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) +{ + struct fpsimd_state fpsimd; + compat_ulong_t magic = VFP_MAGIC; + compat_ulong_t size = VFP_STORAGE_SIZE; + compat_ulong_t fpscr; + int err = 0; + + __get_user_error(magic, &frame->magic, err); + __get_user_error(size, &frame->size, err); + + if (err) + return -EFAULT; + if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return -EINVAL; + + /* + * Copy the FP registers into the start of the fpsimd_state. + * FIXME: Won't work if big endian. + */ + err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, + sizeof(frame->ufp.fpregs)); + + /* Extract the fpsr and the fpcr from the fpscr */ + __get_user_error(fpscr, &frame->ufp.fpscr, err); + fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK; + fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + + /* + * We don't need to touch the exception register, so + * reload the hardware state. + */ + if (!err) + fpsimd_update_current_state(&fpsimd); + + return err ? -EFAULT : 0; +} + +static int compat_restore_sigframe(struct pt_regs *regs, + struct compat_sigframe __user *sf) +{ + int err; + sigset_t set; + struct compat_aux_sigframe __user *aux; + + err = get_sigset_t(&set, &sf->uc.uc_sigmask); + if (err == 0) { + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + } + + __get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); + __get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); + __get_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); + __get_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); + __get_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); + __get_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); + __get_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); + __get_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); + __get_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); + __get_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); + __get_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); + __get_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); + __get_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); + __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); + __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); + __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); + __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + + /* + * Avoid compat_sys_sigreturn() restarting. + */ + regs->syscallno = ~0UL; + + err |= !valid_user_regs(®s->user_regs); + + aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; + if (err == 0) + err |= compat_restore_vfp_context(&aux->vfp); + + return err; +} + +asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +{ + struct compat_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->compat_sp & 7) + goto badframe; + + frame = (struct compat_sigframe __user *)regs->compat_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (compat_restore_sigframe(regs, frame)) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* + * Since we stacked the signal on a 64-bit boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->compat_sp & 7) + goto badframe; + + frame = (struct compat_rt_sigframe __user *)regs->compat_sp; + + if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) + goto badframe; + + if (compat_restore_sigframe(regs, &frame->sig)) + goto badframe; + + if (compat_restore_altstack(&frame->sig.uc.uc_stack)) + goto badframe; + + return regs->regs[0]; + +badframe: + if (show_unhandled_signals) + pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + current->comm, task_pid_nr(current), __func__, + regs->pc, regs->sp); + force_sig(SIGSEGV, current); + return 0; +} + +static void __user *compat_get_sigframe(struct k_sigaction *ka, + struct pt_regs *regs, + int framesize) +{ + compat_ulong_t sp = regs->compat_sp; + void __user *frame; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + /* + * ATPCS B01 mandates 8-byte alignment + */ + frame = compat_ptr((compat_uptr_t)((sp - framesize) & ~7)); + + /* + * Check that we can actually write to the signal frame. + */ + if (!access_ok(VERIFY_WRITE, frame, framesize)) + frame = NULL; + + return frame; +} + +static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, + compat_ulong_t __user *rc, void __user *frame, + int usig) +{ + compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); + compat_ulong_t retcode; + compat_ulong_t spsr = regs->pstate & ~PSR_f; + int thumb; + + /* Check if the handler is written for ARM or Thumb */ + thumb = handler & 1; + + if (thumb) { + spsr |= COMPAT_PSR_T_BIT; + spsr &= ~COMPAT_PSR_IT_MASK; + } else { + spsr &= ~COMPAT_PSR_T_BIT; + } + + if (ka->sa.sa_flags & SA_RESTORER) { + retcode = ptr_to_compat(ka->sa.sa_restorer); + } else { + /* Set up sigreturn pointer */ + unsigned int idx = thumb << 1; + + if (ka->sa.sa_flags & SA_SIGINFO) + idx += 3; + + retcode = AARCH32_VECTORS_BASE + + AARCH32_KERN_SIGRET_CODE_OFFSET + + (idx << 2) + thumb; + } + + regs->regs[0] = usig; + regs->compat_sp = ptr_to_compat(frame); + regs->compat_lr = retcode; + regs->pc = handler; + regs->pstate = spsr; +} + +static int compat_setup_sigframe(struct compat_sigframe __user *sf, + struct pt_regs *regs, sigset_t *set) +{ + struct compat_aux_sigframe __user *aux; + int err = 0; + + __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); + __put_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); + __put_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); + __put_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); + __put_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); + __put_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); + __put_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); + __put_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); + __put_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); + __put_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); + __put_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); + __put_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); + __put_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); + __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); + __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); + __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); + __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + + __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); + __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err); + __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); + __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); + + err |= put_sigset_t(&sf->uc.uc_sigmask, set); + + aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; + + if (err == 0) + err |= compat_preserve_vfp_context(&aux->vfp); + __put_user_error(0, &aux->end_magic, err); + + return err; +} + +/* + * 32-bit signal handling routines called from signal.c + */ +int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + int err = 0; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!frame) + return 1; + + err |= copy_siginfo_to_user32(&frame->info, info); + + __put_user_error(0, &frame->sig.uc.uc_flags, err); + __put_user_error(0, &frame->sig.uc.uc_link, err); + + err |= __compat_save_altstack(&frame->sig.uc.uc_stack, regs->compat_sp); + + err |= compat_setup_sigframe(&frame->sig, regs, set); + + if (err == 0) { + compat_setup_return(regs, ka, frame->sig.retcode, frame, usig); + regs->regs[1] = (compat_ulong_t)(unsigned long)&frame->info; + regs->regs[2] = (compat_ulong_t)(unsigned long)&frame->sig.uc; + } + + return err; +} + +int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs) +{ + struct compat_sigframe __user *frame; + int err = 0; + + frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + + if (!frame) + return 1; + + __put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err); + + err |= compat_setup_sigframe(frame, regs, set); + if (err == 0) + compat_setup_return(regs, ka, frame->retcode, frame, usig); + + return err; +} + +void compat_setup_restart_syscall(struct pt_regs *regs) +{ + regs->regs[7] = __NR_compat_restart_syscall; +} diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100755 index 000000000..70a251af9 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,231 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm + + + .macro dormant_va_log t1, t2, t3, t4, tag + mrs \t1, mpidr_el1 + ubfx \t2, \t1, #0, #8 + ubfx \t3, \t1, #8, #8 + add \t2, \t2, \t3, lsl #2 + ldr \t3, =sleep_aee_rec_cpu_dormant_va + ldr \t3, [\t3] + cbz \t3, 1f + ldr \t4, =\tag + str \t4, [ \t3, \t2, lsl #3 ] +1: + .endm + + .macro dormant_pa_log t1, t2, t3, t4, tag + mrs \t1, mpidr_el1 + ubfx \t2, \t1, #0, #8 + ubfx \t3, \t1, #8, #8 + add \t2, \t2, \t3, lsl #2 + adr \t3, sleep_aee_rec_cpu_dormant + ldr \t3, [\t3] + cbz \t3, 1f + ldr \t4, =\tag + str \t4, [ \t3, \t2, lsl #3 ] +1: + .endm + + +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + + dormant_va_log x7, x8, x9, x10, 0x202 + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + dormant_va_log x7, x8, x9, x10, 0x503 + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + dormant_pa_log x7, x8, x9, x10, 0x501 + bl __calc_phys_offset //el2_setup needs x28 for phys-page_offset + bl el2_setup // if in EL2 drop to EL1 cleanly + +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] +/* bfi x1, xzr, #48, #16 // set the ASID as 0 */ + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp + + + + .type sleep_aee_rec_cpu_dormant, #object +ENTRY(sleep_aee_rec_cpu_dormant) + .quad 0 // pc log + + .type sleep_aee_rec_cpu_dormant_va, #object +ENTRY(sleep_aee_rec_cpu_dormant_va) + .quad 0 // pc log + diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c new file mode 100644 index 000000000..f6d41cc8b --- /dev/null +++ b/arch/arm64/kernel/smp.c @@ -0,0 +1,674 @@ +/* + * SMP initialisation and IPI support + * Based on arch/arm/kernel/smp.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cache.h> +#include <linux/profile.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/clockchips.h> +#include <linux/completion.h> +#include <linux/of.h> + +#include <asm/atomic.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cpu_ops.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/smp_plat.h> +#include <asm/sections.h> +#include <asm/tlbflush.h> +#include <asm/ptrace.h> +#include <mach/wd_api.h> +#include <linux/mt_sched_mon.h> +#include <linux/mtk_ram_console.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/ipi.h> + +/* + * as from 2.5, kernels no longer have an init_tasks structure + * so we need some other way of telling a new secondary core + * where to place its SVC stack + */ +struct secondary_data secondary_data; + +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, + IPI_CPU_STOP, +}; + +/* + * Boot a secondary CPU, and assign it the specified idle task. + * This also gives us the initial stack to use for this CPU. + */ +static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); + + return -EOPNOTSUPP; +} + +static DECLARE_COMPLETION(cpu_running); + +extern int check_pmic_wrap_init(void); +extern void mt_pwrap_hal_init(void); +extern void pmic_full_reset(void); +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +{ + int ret,res; + int i; + struct wd_api * wd_api = NULL; + /* + * We need to tell the secondary core where to find its stack and the + * page tables. + */ + secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; + __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + + /* + * Now bring the CPU into our world. + */ + ret = boot_secondary(cpu, idle); + if (ret == 0) { + /* + * CPU was successfully started, wait for it to come online or + * time out. + */ + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); + + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); + #if 1 + pr_crit("Trigger WDT RESET\n"); + res = get_wd_api(&wd_api); + if(res) + { + pr_crit("get wd api error !!\n"); + }else { + wd_api -> wd_sw_reset(3); //=> this action will ask system to reboot + } + #endif + + ret = -EIO; + } + } else { + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + } + + secondary_data.stack = NULL; + + return ret; +} + +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + +/* + * This is the secondary CPU boot entry. We're using this CPUs + * idle thread stack, but a set of temporary page tables. + */ +asmlinkage void __cpuinit secondary_start_kernel(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int cpu = smp_processor_id(); + aee_rr_rec_hoplug(cpu, 1, 0); + + printk("CPU%u: Booted secondary processor\n", cpu); + + /* + * All kernel threads share the same mm context; grab a + * reference and switch to it. + */ + atomic_inc(&mm->mm_count); + current->active_mm = mm; + cpumask_set_cpu(cpu, mm_cpumask(mm)); + aee_rr_rec_hoplug(cpu, 2, 0); + + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + aee_rr_rec_hoplug(cpu, 3, 0); + + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_set_reserved_ttbr0(); + aee_rr_rec_hoplug(cpu, 4, 0); + flush_tlb_all(); + aee_rr_rec_hoplug(cpu, 5, 0); + + preempt_disable(); + aee_rr_rec_hoplug(cpu, 6, 0); + trace_hardirqs_off(); + aee_rr_rec_hoplug(cpu, 7, 0); + + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); + aee_rr_rec_hoplug(cpu, 8, 0); + + /* + * OK, now it's safe to let the boot CPU continue. Wait for + * the CPU migration code to notice that the CPU is online + * before we continue. + */ + set_cpu_online(cpu, true); + aee_rr_rec_hoplug(cpu, 9, 0); + complete(&cpu_running); + aee_rr_rec_hoplug(cpu, 10, 0); + + smp_store_cpu_info(cpu); + aee_rr_rec_hoplug(cpu, 11, 0); + + /* + * Enable GIC and timers. + */ + notify_cpu_starting(cpu); + aee_rr_rec_hoplug(cpu, 12, 0); + + local_dbg_enable(); + aee_rr_rec_hoplug(cpu, 13, 0); + local_irq_enable(); + aee_rr_rec_hoplug(cpu, 14, 0); + local_fiq_enable(); + aee_rr_rec_hoplug(cpu, 15, 0); + + /* + * OK, it's off to the idle thread for us + */ + cpu_startup_entry(CPUHP_ONLINE); + aee_rr_rec_hoplug(cpu, 16, 0); +} + +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) +{ + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; + + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + int ret; + + ret = op_cpu_disable(cpu); + if (ret) + return ret; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); + + return 0; +} + +static int op_cpu_kill(unsigned int cpu) +{ + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; + + return cpu_ops[cpu]->cpu_kill(cpu); +} + +static DECLARE_COMPLETION(cpu_died); + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; + } + pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + aee_rr_rec_hoplug(cpu, 51, 0); + + idle_task_exit(); + aee_rr_rec_hoplug(cpu, 52, 0); + + local_irq_disable(); + aee_rr_rec_hoplug(cpu, 53, 0); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + aee_rr_rec_hoplug(cpu, 54, 0); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + aee_rr_rec_hoplug(cpu, 55, 0); + + BUG(); +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ + unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); +} + +void __init smp_prepare_boot_cpu(void) +{ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +} + +/* + * Enumerate the possible CPU set from the device tree and build the + * cpu logical map array containing MPIDR values related to logical + * cpus. Assumes that cpu_logical_map(0) has already been initialized. + */ +void __init smp_init_cpus(void) +{ + struct device_node *dn = NULL; + unsigned int i, cpu = 1; + bool bootcpu_valid = false; + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + const u32 *cell; + u64 hwid; + + /* + * A cpu node with missing "reg" property is + * considered invalid to build a cpu_logical_map + * entry. + */ + cell = of_get_property(dn, "reg", NULL); + if (!cell) { + pr_err("%s: missing reg property\n", dn->full_name); + goto next; + } + hwid = of_read_number(cell, of_n_addr_cells(dn)); + + /* + * Non affinity bits must be set to 0 in the DT + */ + if (hwid & ~MPIDR_HWID_BITMASK) { + pr_err("%s: invalid reg property\n", dn->full_name); + goto next; + } + + /* + * Duplicate MPIDRs are a recipe for disaster. Scan + * all initialized entries and check for + * duplicates. If any is found just ignore the cpu. + * cpu_logical_map was initialized to INVALID_HWID to + * avoid matching valid MPIDR values. + */ + for (i = 1; (i < cpu) && (i < NR_CPUS); i++) { + if (cpu_logical_map(i) == hwid) { + pr_err("%s: duplicate cpu reg properties in the DT\n", + dn->full_name); + goto next; + } + } + + /* + * The numbering scheme requires that the boot CPU + * must be assigned logical id 0. Record it so that + * the logical map built from DT is validated and can + * be used. + */ + if (hwid == cpu_logical_map(0)) { + if (bootcpu_valid) { + pr_err("%s: duplicate boot cpu reg property in DT\n", + dn->full_name); + goto next; + } + + bootcpu_valid = true; + + /* + * cpu_logical_map has already been + * initialized and the boot cpu doesn't need + * the enable-method so continue without + * incrementing cpu. + */ + continue; + } + + if (cpu >= NR_CPUS) + goto next; + + if (cpu_read_ops(dn, cpu) != 0) + goto next; + + if (cpu_ops[cpu]->cpu_init(dn, cpu)) + goto next; + + pr_debug("cpu logical map 0x%llx\n", hwid); + cpu_logical_map(cpu) = hwid; +next: + cpu++; + } + + /* sanity check */ + if (cpu > NR_CPUS) + pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n", + cpu, NR_CPUS); + + if (!bootcpu_valid) { + pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n"); + return; + } + + /* + * All the cpus that made it to the cpu_logical_map have been + * validated so set them as possible cpus. + */ + for (i = 0; i < NR_CPUS; i++) + if (cpu_logical_map(i) != INVALID_HWID) + set_cpu_possible(i, true); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int err; + unsigned int cpu, ncores = num_possible_cpus(); + + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); + + + /* + * are we trying to boot more cores than exist? + */ + if (max_cpus > ncores) + max_cpus = ncores; + + /* Don't bother if we're effectively UP */ + if (max_cpus <= 1) + return; + + /* + * Initialise the present map (which describes the set of CPUs + * actually populated at the present time) and release the + * secondaries from the bootloader. + * + * Make sure we online at most (max_cpus - 1) additional CPUs. + */ + max_cpus--; + for_each_possible_cpu(cpu) { + if (max_cpus == 0) + break; + + if (cpu == smp_processor_id()) + continue; + + if (!cpu_ops[cpu]) + continue; + + err = cpu_ops[cpu]->cpu_prepare(cpu); + if (err) + continue; + + set_cpu_present(cpu, true); + max_cpus--; + } +} + +static void (*__smp_cross_call)(const struct cpumask *, unsigned int); + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) +{ + __smp_cross_call = fn; +} + +static const char *ipi_types[NR_IPI] __tracepoint_string = { +#define S(x,s) [x] = s + S(IPI_RESCHEDULE, "Rescheduling interrupts"), + S(IPI_CALL_FUNC, "Function call interrupts"), + S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), + S(IPI_CPU_STOP, "CPU stop interrupts"), +}; + +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) +{ + trace_ipi_raise(target, ipi_types[ipinr]); + __smp_cross_call(target, ipinr); +} + +void show_ipi_list(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, + prec >= 4 ? " " : ""); + for_each_present_cpu(cpu) + seq_printf(p, "%10u ", + __get_irq_stat(cpu, ipi_irqs[i])); + seq_printf(p, " %s\n", ipi_types[i]); + } +} + +u64 smp_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = 0; + int i; + + for (i = 0; i < NR_IPI; i++) + sum += __get_irq_stat(cpu, ipi_irqs[i]); + + return sum; +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +static DEFINE_RAW_SPINLOCK(stop_lock); + +/* + * ipi_cpu_stop - handle IPI from smp_send_stop() + */ +static void ipi_cpu_stop(unsigned int cpu) +{ + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) { + raw_spin_lock(&stop_lock); + pr_crit("CPU%u: stopping\n", cpu); + dump_stack(); + raw_spin_unlock(&stop_lock); + } + + set_cpu_online(cpu, false); + + local_fiq_disable(); + local_irq_disable(); + + while (1) + cpu_relax(); +} + +/* + * Main handler for inter-processor interrupts + */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + struct pt_regs *old_regs = set_irq_regs(regs); + + if ((unsigned)ipinr < NR_IPI) { + trace_ipi_entry(ipi_types[ipinr]); + __inc_irq_stat(cpu, ipi_irqs[ipinr]); + } + + switch (ipinr) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + irq_enter(); + mt_trace_ISR_start(ipinr); + generic_smp_call_function_interrupt(); + mt_trace_ISR_end(ipinr); + irq_exit(); + break; + + case IPI_CALL_FUNC_SINGLE: + irq_enter(); + mt_trace_ISR_start(ipinr); + generic_smp_call_function_single_interrupt(); + mt_trace_ISR_end(ipinr); + irq_exit(); + break; + + case IPI_CPU_STOP: + irq_enter(); + mt_trace_ISR_start(ipinr); + ipi_cpu_stop(cpu); + mt_trace_ISR_end(ipinr); + irq_exit(); + break; + + default: + pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); + break; + } + + if ((unsigned)ipinr < NR_IPI) + trace_ipi_exit(ipi_types[ipinr]); + set_irq_regs(old_regs); +} + +void smp_send_reschedule(int cpu) +{ + smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); +} + +void smp_send_stop(void) +{ + unsigned long timeout; + + if (num_online_cpus() > 1) { + cpumask_t mask; + + cpumask_copy(&mask, cpu_online_mask); + cpu_clear(smp_processor_id(), mask); + + smp_cross_call(&mask, IPI_CPU_STOP); + } + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); + + if (num_online_cpus() > 1) + pr_warning("SMP: failed to stop secondary CPUs\n"); +} + +/* + * not supported here + */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c new file mode 100644 index 000000000..87f32f3d3 --- /dev/null +++ b/arch/arm64/kernel/smp_spin_table.c @@ -0,0 +1,170 @@ +/* + * Spin Table SMP initialisation + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; + +#include <linux/io.h> +#include <linux/of_address.h> + + +static phys_addr_t cpu_release_addr[NR_CPUS]; +static DEFINE_RAW_SPINLOCK(boot_lock); + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) +{ + /* + * Determine the address from which the CPU is polling. + */ + if (of_property_read_u64(dn, "cpu-release-addr", + &cpu_release_addr[cpu])) { + pr_err("CPU %d: missing or invalid cpu-release-addr property\n", + cpu); + + return -1; + } + + return 0; +} + +/*MTK only*/ +#define CCI400_SI4_BASE 0x5000 +#define CCI400_SI4_SNOOP_CONTROL CCI400_SI4_BASE +#define DVM_MSG_REQ (1U << 1) +#define SNOOP_REQ (1U << 0) +#define CCI400_STATUS 0x000C +#define CHANGE_PENDING (1U << 0) + +static int smp_spin_table_cpu_prepare(unsigned int cpu) +{ + void **release_addr; + + struct device_node *node; + void __iomem *cci400_base; + + if (!cpu_release_addr[cpu]) + return -ENODEV; + + /*MTK only. Setup coherence interface*/ + node = of_find_compatible_node(NULL, NULL, "mediatek,CCI400"); + if(node) + { + cci400_base = of_iomap(node, 0); + + printk(KERN_EMERG "1.CCI400_SI4_SNOOP_CONTROL:0x%p, 0x%08x\n", cci400_base + CCI400_SI4_SNOOP_CONTROL, readl(cci400_base + CCI400_SI4_SNOOP_CONTROL)); + /* Enable snoop requests and DVM message requests*/ + writel(readl(cci400_base + CCI400_SI4_SNOOP_CONTROL) | (SNOOP_REQ | DVM_MSG_REQ), cci400_base + CCI400_SI4_SNOOP_CONTROL); + while (readl(cci400_base + CCI400_STATUS) & CHANGE_PENDING); + printk(KERN_EMERG "2.CCI400_SI4_SNOOP_CONTROL:0x%p, 0x%08x\n", cci400_base + CCI400_SI4_SNOOP_CONTROL,readl(cci400_base + CCI400_SI4_SNOOP_CONTROL)); + } + + release_addr = __va(cpu_release_addr[cpu]); + release_addr[0] = (void *)__pa(secondary_holding_pen); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); + + /* + * Send an event to wake up the secondary CPU. + */ + sev(); + + return 0; +} + +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + unsigned long timeout; + + /* + * Set synchronisation state between this boot processor + * and the secondary one + */ + raw_spin_lock(&boot_lock); + + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + timeout = jiffies + (1 * HZ); + while (time_before(jiffies, timeout)) { + if (secondary_holding_pen_release == INVALID_HWID) + break; + udelay(10); + } + + /* + * Now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + raw_spin_unlock(&boot_lock); + + return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; +} + +void smp_spin_table_cpu_postboot(void) +{ + /* + * Let the primary processor know we're out of the pen. + */ + write_pen_release(INVALID_HWID); + + /* + * Synchronise with the boot thread. + */ + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); +} + +const struct cpu_operations smp_spin_table_ops = { + .name = "spin-table", + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, + .cpu_postboot = smp_spin_table_cpu_postboot, +}; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c new file mode 100644 index 000000000..a797fa4bd --- /dev/null +++ b/arch/arm64/kernel/stacktrace.c @@ -0,0 +1,131 @@ +/* + * Stack tracing support + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> + +#include <asm/stacktrace.h> + +/* + * AArch64 PCS assigns the frame pointer to x29. + * + * A simple function prologue looks like this: + * sub sp, sp, #0x10 + * stp x29, x30, [sp] + * mov x29, sp + * + * A simple function epilogue looks like this: + * mov sp, x29 + * ldp x29, x30, [sp] + * add sp, sp, #0x10 + */ +int notrace unwind_frame(struct stackframe *frame) +{ + unsigned long high, low; + unsigned long fp = frame->fp; + + low = frame->sp; + high = ALIGN(low, THREAD_SIZE); + + if (fp < low || fp > high || fp & 0xf) + return -EINVAL; + + frame->sp = fp + 0x10; + frame->fp = *(unsigned long *)(fp); + /* + * -4 here because we care about the PC at time of bl, + * not where the return will go. + */ + frame->pc = *(unsigned long *)(fp + 8) - 4; + + return 0; +} + +void notrace walk_stackframe(struct stackframe *frame, + int (*fn)(struct stackframe *, void *), void *data) +{ + while (1) { + int ret; + + if (fn(frame, data)) + break; + ret = unwind_frame(frame); + if (ret < 0) + break; + } +} +EXPORT_SYMBOL(walk_stackframe); + +#ifdef CONFIG_STACKTRACE +struct stack_trace_data { + struct stack_trace *trace; + unsigned int no_sched_functions; + unsigned int skip; +}; + +static int save_trace(struct stackframe *frame, void *d) +{ + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + unsigned long addr = frame->pc; + + if (data->no_sched_functions && in_sched_functions(addr)) + return 0; + if (data->skip) { + data->skip--; + return 0; + } + + trace->entries[trace->nr_entries++] = addr; + + return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + + if (tsk != current) { + data.no_sched_functions = 1; + frame.fp = thread_saved_fp(tsk); + frame.sp = thread_saved_sp(tsk); + frame.pc = thread_saved_pc(tsk); + } else { + register unsigned long current_sp asm("sp"); + data.no_sched_functions = 0; + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)save_stack_trace_tsk; + } + + walk_stackframe(&frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 000000000..9924a2cb1 --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,151 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern unsigned long * sleep_aee_rec_cpu_dormant_va; + +#define DORMANT_LOG(cpu,pattern) do { \ + if (sleep_aee_rec_cpu_dormant_va != 0) { \ + sleep_aee_rec_cpu_dormant_va[cpu] = pattern; \ + } \ +} while(0) + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ + /* Prevent multiple restore hook initializations */ + if (WARN_ON(hw_breakpoint_restore)) + return; + hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + DORMANT_LOG(cpu, 0x201); + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c new file mode 100644 index 000000000..508fd2edb --- /dev/null +++ b/arch/arm64/kernel/swp_emulate.c @@ -0,0 +1,223 @@ +/* + * Derived from from linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/perf_event.h> + +#include <asm/opcodes.h> +#include <asm/traps.h> +#include <asm/uaccess.h> +#include <asm/system_misc.h> +#include <linux/debugfs.h> + +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ + +static int swpb(u8 in, u8 *out, u8 *addr) +{ + u8 _out; + int res; + int err; + + do { + __asm__ __volatile__( + "0: ldxrb %w1, %4\n" + "1: stxrb %w0, %w3, %4\n" + " mov %w2, #0\n" + "2:\n" + " .section .fixup,\"ax\"\n" + " .align 2\n" + "3: mov %w2, %5\n" + " b 2b\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 3\n" + " .quad 0b, 3b\n" + " .quad 1b, 3b\n" + " .previous" + : "=&r" (res), "=r" (_out), "=r" (err) + : "r" (in), "Q" (*addr), "i" (-EFAULT) + : "cc", "memory"); + } while (err == 0 && res != 0); + + if (err == 0) + *out = _out; + return err; +} + +static int swp(u32 in, u32 *out, u32 *addr) +{ + u32 _out; + int res; + int err = 0; + + do { + __asm__ __volatile__( + "0: ldxr %w1, %4\n" + "1: stxr %w0, %w3, %4\n" + " mov %w2, #0\n" + "2:\n" + " .section .fixup,\"ax\"\n" + " .align 2\n" + "3: mov %w2, %5\n" + " b 2b\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 3\n" + " .quad 0b, 3b\n" + " .quad 1b, 3b\n" + " .previous" + : "=&r" (res), "=r" (_out), "=r" (err) + : "r" (in), "Q" (*addr), "i" (-EFAULT) + : "cc", "memory"); + } while (err == 0 && res != 0); + + if (err == 0) + *out = _out; + return err; +} +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static pid_t previous_pid; + +u64 swpb_count = 0; +u64 swp_count = 0; + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + u32 destreg, data, type; + uintptr_t address; + unsigned int res = 0; + int err; + u32 temp32; + u8 temp8; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + res = arm_check_condition(instr, regs->pstate); + switch (res) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + regs->pc += 4; + return 0; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a SWP, undef */ + return -EFAULT; + default: + return -EINVAL; + } + + if (current->pid != previous_pid) { + pr_warn("\"%s\" (%ld) uses obsolete SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->regs[EXTRACT_REG_NUM(instr, RN_OFFSET)] & 0xffffffff; + data = regs->regs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } + if (type == TYPE_SWPB) { + err = swpb((u8) data, &temp8, (u8 *) address); + if (err) + return err; + regs->regs[destreg] = temp8; + regs->pc += 4; + swpb_count++; + } else if (address & 0x3) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } else { + err = swp((u32) data, &temp32, (u32 *) address); + if (err) + return err; + regs->regs[destreg] = temp32; + regs->pc += 4; + swp_count++; + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .pstate_mask = COMPAT_PSR_MODE_MASK | COMPAT_PSR_T_BIT, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ + struct dentry *dir; + dir = debugfs_create_dir("swp_emulate", NULL); + debugfs_create_u64("swp_count", S_IRUGO | S_IWUSR, dir, &swp_count); + debugfs_create_u64("swpb_count", S_IRUGO | S_IWUSR, dir, &swpb_count); + + pr_notice("Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + + return 0; +} + +late_initcall(swp_emulation_init); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c new file mode 100644 index 000000000..3fa98ff14 --- /dev/null +++ b/arch/arm64/kernel/sys.c @@ -0,0 +1,56 @@ +/* + * AArch64-specific system calls implementation + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> + +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t off) +{ + if (offset_in_page(off) != 0) + return -EINVAL; + + return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); +} + +/* + * Wrappers to pass the pt_regs argument. + */ +#define sys_rt_sigreturn sys_rt_sigreturn_wrapper + +#include <asm/syscalls.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = sym, + +/* + * The sys_call_table array must be 4K aligned to be accessible from + * kernel/entry.S. + */ +void *sys_call_table[__NR_syscalls] __aligned(4096) = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S new file mode 100644 index 000000000..a1b19ed74 --- /dev/null +++ b/arch/arm64/kernel/sys32.S @@ -0,0 +1,115 @@ +/* + * Compat system call wrappers + * + * Copyright (C) 2012 ARM Ltd. + * Authors: Will Deacon <will.deacon@arm.com> + * Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> + +/* + * System call wrappers for the AArch32 compatibility layer. + */ + +compat_sys_sigreturn_wrapper: + mov x0, sp + mov x27, #0 // prevent syscall restart handling (why) + b compat_sys_sigreturn +ENDPROC(compat_sys_sigreturn_wrapper) + +compat_sys_rt_sigreturn_wrapper: + mov x0, sp + mov x27, #0 // prevent syscall restart handling (why) + b compat_sys_rt_sigreturn +ENDPROC(compat_sys_rt_sigreturn_wrapper) + +compat_sys_statfs64_wrapper: + mov w3, #84 + cmp w1, #88 + csel w1, w3, w1, eq + b compat_sys_statfs64 +ENDPROC(compat_sys_statfs64_wrapper) + +compat_sys_fstatfs64_wrapper: + mov w3, #84 + cmp w1, #88 + csel w1, w3, w1, eq + b compat_sys_fstatfs64 +ENDPROC(compat_sys_fstatfs64_wrapper) + +/* + * Wrappers for AArch32 syscalls that either take 64-bit parameters + * in registers or that take 32-bit parameters which require sign + * extension. + */ +compat_sys_pread64_wrapper: + orr x3, x4, x5, lsl #32 + b sys_pread64 +ENDPROC(compat_sys_pread64_wrapper) + +compat_sys_pwrite64_wrapper: + orr x3, x4, x5, lsl #32 + b sys_pwrite64 +ENDPROC(compat_sys_pwrite64_wrapper) + +compat_sys_truncate64_wrapper: + orr x1, x2, x3, lsl #32 + b sys_truncate +ENDPROC(compat_sys_truncate64_wrapper) + +compat_sys_ftruncate64_wrapper: + orr x1, x2, x3, lsl #32 + b sys_ftruncate +ENDPROC(compat_sys_ftruncate64_wrapper) + +compat_sys_readahead_wrapper: + orr x1, x2, x3, lsl #32 + mov w2, w4 + b sys_readahead +ENDPROC(compat_sys_readahead_wrapper) + +compat_sys_fadvise64_64_wrapper: + mov w6, w1 + orr x1, x2, x3, lsl #32 + orr x2, x4, x5, lsl #32 + mov w3, w6 + b sys_fadvise64_64 +ENDPROC(compat_sys_fadvise64_64_wrapper) + +compat_sys_sync_file_range2_wrapper: + orr x2, x2, x3, lsl #32 + orr x3, x4, x5, lsl #32 + b sys_sync_file_range2 +ENDPROC(compat_sys_sync_file_range2_wrapper) + +compat_sys_fallocate_wrapper: + orr x2, x2, x3, lsl #32 + orr x3, x4, x5, lsl #32 + b sys_fallocate +ENDPROC(compat_sys_fallocate_wrapper) + +#undef __SYSCALL +#define __SYSCALL(x, y) .quad y // x + +/* + * The system calls table must be 4KB aligned. + */ + .align 12 +ENTRY(compat_sys_call_table) +#include <asm/unistd32.h> diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c new file mode 100644 index 000000000..dc47e53e9 --- /dev/null +++ b/arch/arm64/kernel/sys_compat.c @@ -0,0 +1,94 @@ +/* + * Based on arch/arm/kernel/sys_arm.c + * + * Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c + * Copyright (C) 1995, 1996 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/personality.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/unistd.h> + +static inline void +do_compat_cache_op(unsigned long start, unsigned long end, int flags) +{ + struct mm_struct *mm = current->active_mm; + struct vm_area_struct *vma; + + if (end < start || flags) + return; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (vma && vma->vm_start < end) { + if (start < vma->vm_start) + start = vma->vm_start; + if (end > vma->vm_end) + end = vma->vm_end; + up_read(&mm->mmap_sem); + __flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end)); + return; + } + up_read(&mm->mmap_sem); +} + +/* + * Handle all unrecognised system calls. + */ +long compat_arm_syscall(struct pt_regs *regs) +{ + unsigned int no = regs->regs[7]; + + switch (no) { + /* + * Flush a region from virtual address 'r0' to virtual address 'r1' + * _exclusive_. There is no alignment requirement on either address; + * user space does not need to know the hardware cache layout. + * + * r2 contains flags. It should ALWAYS be passed as ZERO until it + * is defined to be something else. For now we ignore it, but may + * the fires of hell burn in your belly if you break this rule. ;) + * + * (at a later date, we may want to allow this call to not flush + * various aspects of the cache. Passing '0' will guarantee that + * everything necessary gets flushed to maintain consistency in + * the specified region). + */ + case __ARM_NR_compat_cacheflush: + do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); + return 0; + + case __ARM_NR_compat_set_tls: + current->thread.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. + * See comment in tls_thread_flush. + */ + barrier(); + asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); + return 0; + + default: + return -ENOSYS; + } +} diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c new file mode 100644 index 000000000..3a369aa36 --- /dev/null +++ b/arch/arm64/kernel/time.c @@ -0,0 +1,82 @@ +/* + * Based on arch/arm/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * Modifications for ARM (C) 1994-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/errno.h> +#include <linux/profile.h> +#include <linux/syscore_ops.h> +#include <linux/timer.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/clocksource.h> + +#include <clocksource/arm_arch_timer.h> + +#include <asm/thread_info.h> +#include <asm/stacktrace.h> + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + struct stackframe frame; + + if (!in_lock_functions(regs->pc)) + return regs->pc; + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + do { + int ret = unwind_frame(&frame); + if (ret < 0) + return 0; + } while (in_lock_functions(frame.pc)); + + return frame.pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +int read_current_timer(unsigned long *timer_value) +{ + *timer_value = arch_timer_read_counter(); + return 0; +} + +void __init time_init(void) +{ + u32 arch_timer_rate; + + clocksource_of_init(); + + arch_timer_rate = arch_timer_get_rate(); + if (!arch_timer_rate) + panic("Unable to initialise architected timer.\n"); + + /* Calibrate the delay loop directly */ + lpj_fine = arch_timer_rate / HZ; +} diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000..24033bf7c --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,1233 @@ +/* + * arch/arm/kernel/topology.c + * + * Copyright (C) 2011 Linaro Limited. + * Written by: Vincent Guittot + * + * based on arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/smp_plat.h> +#include <asm/topology.h> + +/* + * cpu power scale management + */ + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ + +/* when CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY is in use, a new measure of + * compute capacity is available. This is limited to a maximum of 1024 and + * scaled between 0 and 1023 according to frequency. + * Cores with different base CPU powers are scaled in line with this. + * CPU capacity for each core represents a comparable ratio to maximum + * achievable core compute capacity for a core in this system. + * + * e.g.1 If all cores in the system have a base CPU power of 1024 according to + * efficiency calculations and are DVFS scalable between 500MHz and 1GHz, the + * cores currently at 1GHz will have CPU power of 1024 whilst the cores + * currently at 500MHz will have CPU power of 512. + * + * e.g.2 + * If core 0 has a base CPU power of 2048 and runs at 500MHz & 1GHz whilst + * core 1 has a base CPU power of 1024 and runs at 100MHz and 200MHz, then + * the following possibilities are available: + * + * cpu power\| 1GHz:100Mhz | 1GHz : 200MHz | 500MHz:100MHz | 500MHz:200MHz | + * ----------|-------------|---------------|---------------|---------------| + * core 0 | 1024 | 1024 | 512 | 512 | + * core 1 | 256 | 512 | 256 | 512 | + * + * This information may be useful to the scheduler when load balancing, + * so that the compute capacity of the core a task ran on can be baked into + * task load histories. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); +#ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY +static DEFINE_PER_CPU(unsigned long, base_cpu_capacity); +static DEFINE_PER_CPU(unsigned long, invariant_cpu_capacity); +static DEFINE_PER_CPU(unsigned long, prescaled_cpu_capacity); +#endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */ + +static int frequency_invariant_power_enabled = 1; + +/* >0=1, <=0=0 */ +void arch_set_invariant_power_enabled(int val) +{ + if(val>0) + frequency_invariant_power_enabled = 1; + else + frequency_invariant_power_enabled = 0; +} + +int arch_get_invariant_power_enabled(void) +{ + return frequency_invariant_power_enabled; +} + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +#ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY +unsigned long arch_get_cpu_capacity(int cpu) +{ + return per_cpu(invariant_cpu_capacity, cpu); +} +unsigned long arch_get_max_cpu_capacity(int cpu) +{ + return per_cpu(base_cpu_capacity, cpu); +} +#endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */ + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +struct cpu_efficiency table_efficiency[] = { + {"arm,cortex-a57", 3891}, + {"arm,cortex-a53", 2048}, + {NULL, }, +}; + +struct cpu_capacity { + unsigned long hwid; + unsigned long capacity; +}; + +struct cpu_capacity *cpu_capacity; + +unsigned long middle_capacity = 1; +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static void __init parse_dt_topology(void) +{ + struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = (unsigned long)(-1); + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int alloc_size, cpu = 0; + + alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity); + cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT); + + while ((cn = of_find_node_by_type(cn, "cpu"))) { + const u32 *rate, *reg; + int len; + + if (cpu >= num_possible_cpus()) + break; + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) + continue; + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s missing clock-frequency property\n", + cn->full_name); + continue; + } + + reg = of_get_property(cn, "reg", &len); + if (!reg || len != 4) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity[cpu].capacity = capacity; + cpu_capacity[cpu++].hwid = be32_to_cpup(reg); + } + + if (cpu < num_possible_cpus()) + cpu_capacity[cpu].hwid = (unsigned long)(-1); + + /* If min and max capacities are equals, we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + cpu_capacity[0].hwid = (unsigned long)(-1); + else if (4*max_capacity < (3*(max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_capacity table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +void update_cpu_power(unsigned int cpu, unsigned long hwid) +{ + unsigned int idx = 0; + + /* look for the cpu's hwid in the cpu capacity table */ + for (idx = 0; idx < num_possible_cpus(); idx++) { + if (cpu_capacity[idx].hwid == hwid) + break; + + if (cpu_capacity[idx].hwid == -1) + return; + } + + if (idx == num_possible_cpus()) + return; + + set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity); + + printk(KERN_INFO "CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} +#endif + +/* + * cpu topology table + */ +struct cputopo_arm cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +#if defined (CONFIG_MTK_SCHED_CMP_PACK_SMALL_TASK) || defined (CONFIG_HMP_PACK_SMALL_TASK) +int arch_sd_share_power_line(void) +{ + return 0*SD_SHARE_POWERLINE; +} +#endif /* CONFIG_MTK_SCHED_CMP_PACK_SMALL_TASK || CONFIG_HMP_PACK_SMALL_TASK */ + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +#ifdef CONFIG_MTK_CPU_TOPOLOGY + +enum { + ARCH_UNKNOWN = 0, + ARCH_SINGLE_CLUSTER, + ARCH_MULTI_CLUSTER, + ARCH_BIG_LITTLE, +}; + +struct cpu_cluster { + int cluster_id; + cpumask_t siblings; + void *next; +}; + +struct cpu_compatible { + const char *name; + const unsigned int cpuidr; + struct cpu_cluster *cluster; + int clscnt; +}; + +struct cpu_arch_info { + struct cpu_compatible *compat_big; + struct cpu_compatible *compat_ltt; + bool arch_ready; + int arch_type; + int nr_clusters; +}; + +/* NOTE: absolute decending ordered by cpu capacity */ +struct cpu_compatible cpu_compat_table[] = { + { "arm,cortex-a57", ARM_CPU_PART_CORTEX_A57, NULL, 0 }, + { "arm,cortex-a53", ARM_CPU_PART_CORTEX_A53, NULL, 0 }, + { NULL, 0, NULL, 0 } +}; + +static struct cpu_compatible* compat_cputopo[NR_CPUS]; + +static struct cpu_arch_info default_cpu_arch = { + NULL, + NULL, + 0, + ARCH_UNKNOWN, + 0, +}; +static struct cpu_arch_info *glb_cpu_arch = &default_cpu_arch; + +static int __arch_type(void) +{ + int i, num_compat = 0; + + if (!glb_cpu_arch->arch_ready) + return ARCH_UNKNOWN; + + // return the cached setting if query more than once. + if (glb_cpu_arch->arch_type != ARCH_UNKNOWN) + return glb_cpu_arch->arch_type; + + for (i = 0; i < ARRAY_SIZE(cpu_compat_table); i++) { + struct cpu_compatible *mc = &cpu_compat_table[i]; + if (mc->clscnt != 0) + num_compat++; + } + + if (num_compat > 1) + glb_cpu_arch->arch_type = ARCH_BIG_LITTLE; + else if (glb_cpu_arch->nr_clusters > 1) + glb_cpu_arch->arch_type = ARCH_MULTI_CLUSTER; + else if (num_compat == 1 && glb_cpu_arch->nr_clusters == 1) + glb_cpu_arch->arch_type = ARCH_SINGLE_CLUSTER; + + return glb_cpu_arch->arch_type; +} + +static DEFINE_SPINLOCK(__cpu_cluster_lock); +static void __setup_cpu_cluster(const unsigned int cpu, + struct cpu_compatible * const cpt, + const u32 mpidr) +{ + struct cpu_cluster *prev_cls, *cls; + u32 cls_id = -1; + + if (mpidr & MPIDR_MT_BITMASK) + cls_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + else + cls_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + spin_lock(&__cpu_cluster_lock); + + cls = cpt->cluster; + prev_cls = cls; + while (cls) { + if (cls->cluster_id == cls_id) + break; + prev_cls = cls; + cls = (struct cpu_cluster *)cls->next; + } + + if (!cls) { + cls = kzalloc(sizeof(struct cpu_cluster), GFP_ATOMIC); + BUG_ON(!cls); + cls->cluster_id = cls_id; + cpt->clscnt++; + glb_cpu_arch->nr_clusters++; + /* link it */ + if (!cpt->cluster) + cpt->cluster = cls; + else + prev_cls->next = cls; + } + BUG_ON(cls->cluster_id != cls_id); + + cpumask_set_cpu(cpu, &cls->siblings); + smp_wmb(); + + spin_unlock(&__cpu_cluster_lock); +} + +static void setup_cputopo(const unsigned int cpu, + struct cpu_compatible * const cpt, + const u32 mpidr) + +{ + if (compat_cputopo[cpu]) + return; + + compat_cputopo[cpu] = cpt; + + if (!glb_cpu_arch->compat_big || glb_cpu_arch->compat_big > cpt) + glb_cpu_arch->compat_big = cpt; + + if (!glb_cpu_arch->compat_ltt || glb_cpu_arch->compat_ltt < cpt) + glb_cpu_arch->compat_ltt = cpt; + + __setup_cpu_cluster(cpu, cpt, mpidr); +} + +static void setup_cputopo_def(const unsigned int cpu) +{ + struct cpu_compatible *idx = NULL; + unsigned int cpuidr = 0, mpidr; + + BUG_ON(cpu != smp_processor_id()); + cpuidr = read_cpuid_part_number(); + mpidr = read_cpuid_mpidr(); + for (idx = cpu_compat_table; idx->name; idx++) { + if (idx->cpuidr == cpuidr) + break; + } + BUG_ON(!idx || !idx->name); + setup_cputopo(cpu, idx, mpidr); +} + +static void reset_cputopo(void) +{ + struct cpu_compatible *idx; + + memset(glb_cpu_arch, 0, sizeof(struct cpu_arch_info)); + glb_cpu_arch->arch_type = ARCH_UNKNOWN; + + memset(&compat_cputopo, 0, sizeof(compat_cputopo)); + + spin_lock(&__cpu_cluster_lock); + for (idx = cpu_compat_table; idx->name; idx++) { + struct cpu_cluster *curr, *next; + + if (idx->clscnt == 0) + continue; + BUG_ON(!idx->cluster); + + curr = idx->cluster; + next = (struct cpu_cluster *)curr->next; + kfree(curr); + + while (next) { + curr = next; + next = (struct cpu_cluster *)curr->next; + kfree(curr); + } + idx->cluster = NULL; + idx->clscnt = 0; + } + spin_unlock(&__cpu_cluster_lock); +} + +/* verify cpu topology correctness by device tree. + * This function is called when current CPU is cpuid! + */ +static void verify_cputopo(const unsigned int cpuid, const u32 mpidr) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + struct cpu_compatible *cpt; + struct cpu_cluster *cls; + + if (!glb_cpu_arch->arch_ready) { + int i; + + setup_cputopo_def(cpuid); + for (i = 0; i < nr_cpu_ids; i++) + if (!compat_cputopo[i]) + break; + if (i == nr_cpu_ids) + glb_cpu_arch->arch_ready = true; + + return; + } + + cpt = compat_cputopo[cpuid]; + BUG_ON(!cpt); + cls = cpt->cluster; + while (cls) { + if (cpu_isset(cpuid, cls->siblings)) + break; + cls = cls->next; + } + BUG_ON(!cls); + WARN(cls->cluster_id != cpuid_topo->socket_id, + "[%s] cpu id: %d, cluster id (%d) != socket id (%d)\n", + __func__, cpuid, cls->cluster_id, cpuid_topo->socket_id); +} + +/* + * return 1 while every cpu is recognizible + */ +void arch_build_cpu_topology_domain(void) +{ + struct device_node *cn = NULL; + unsigned int cpu = 0; + u32 mpidr; + + memset(&compat_cputopo, 0, sizeof(compat_cputopo)); + // default by device tree parsing + while ((cn = of_find_node_by_type(cn, "cpu"))) { + struct cpu_compatible *idx; + const u32 *reg; + int len; + + if (unlikely(cpu >= nr_cpu_ids)) { + pr_err("[CPUTOPO][%s] device tree cpu%d is over possible's\n", + __func__, cpu); + break; + } + + for (idx = cpu_compat_table; idx->name; idx++) + if (of_device_is_compatible(cn, idx->name)) + break; + + if (!idx || !idx->name) { + int cplen; + const char *cp; + cp = (char *) of_get_property(cn, "compatible", &cplen); + pr_err("[CPUTOPO][%s] device tree cpu%d (%s) is not compatible!!\n", + __func__, cpu, cp); + break; + } + + reg = of_get_property(cn, "reg", &len); + if (!reg || len != 4) { + pr_err("[CPUTOPO][%s] missing reg property\n", cn->full_name); + break; + } + mpidr = be32_to_cpup(reg); + setup_cputopo(cpu, idx, mpidr); + cpu++; + } + glb_cpu_arch->arch_ready = (cpu == nr_cpu_ids); + + if (!glb_cpu_arch->arch_ready) { + pr_warn("[CPUTOPO][%s] build cpu topology failed, to be handled by mpidr/cpuidr regs!\n", __func__); + reset_cputopo(); + setup_cputopo_def(smp_processor_id()); + } +} + +int arch_cpu_is_big(unsigned int cpu) +{ + int type; + + if (unlikely(cpu >= nr_cpu_ids)) + BUG(); + + type = __arch_type(); + switch(type) { + case ARCH_BIG_LITTLE: + return (compat_cputopo[cpu] == glb_cpu_arch->compat_big); + default: + /* treat as little */ + return 0; + } +} + +int arch_cpu_is_little(unsigned int cpu) +{ + int type; + + if (unlikely(cpu >= nr_cpu_ids)) + BUG(); + + type = __arch_type(); + switch(type) { + case ARCH_BIG_LITTLE: + return (compat_cputopo[cpu] == glb_cpu_arch->compat_ltt); + default: + /* treat as little */ + return 1; + } +} + +int arch_is_multi_cluster(void) +{ + return (__arch_type() == ARCH_MULTI_CLUSTER || __arch_type() == ARCH_BIG_LITTLE); +} + +int arch_is_big_little(void) +{ + return (__arch_type() == ARCH_BIG_LITTLE); +} + +int arch_get_nr_clusters(void) +{ + return glb_cpu_arch->nr_clusters; +} + +int arch_get_cluster_id(unsigned int cpu) +{ + struct cputopo_arm *arm_cputopo = &cpu_topology[cpu]; + struct cpu_compatible *cpt; + struct cpu_cluster *cls; + + BUG_ON(cpu >= nr_cpu_ids); + if (!glb_cpu_arch->arch_ready) { + WARN_ONCE(!glb_cpu_arch->arch_ready, "[CPUTOPO][%s] cpu(%d), socket_id(%d) topology is not ready!\n", + __func__, cpu, arm_cputopo->socket_id); + if (unlikely(arm_cputopo->socket_id < 0)) + return 0; + return arm_cputopo->socket_id; + } + + cpt = compat_cputopo[cpu]; + BUG_ON(!cpt); + cls = cpt->cluster; + while (cls) { + if (cpu_isset(cpu, cls->siblings)) + break; + cls = cls->next; + } + BUG_ON(!cls); + WARN_ONCE(cls->cluster_id != arm_cputopo->socket_id, "[CPUTOPO][%s] cpu(%d): cluster_id(%d) != socket_id(%d) !\n", + __func__, cpu, cls->cluster_id, arm_cputopo->socket_id); + + return cls->cluster_id; +} + +static struct cpu_cluster *__get_cluster_slowpath(int cluster_id) +{ + int i = 0; + struct cpu_compatible *cpt; + struct cpu_cluster *cls; + + for (i = 0; i < nr_cpu_ids; i++) { + cpt = compat_cputopo[i]; + BUG_ON(!cpt); + cls = cpt->cluster; + while (cls) { + if (cls->cluster_id == cluster_id) + return cls; + cls = cls->next; + } + } + return NULL; +} + +void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id) +{ + struct cpu_cluster *cls = NULL; + + cpumask_clear(cpus); + + if (likely(glb_cpu_arch->compat_ltt)) { + cls = glb_cpu_arch->compat_ltt->cluster; + while (cls) { + if (cls->cluster_id == cluster_id) + goto found; + cls = cls->next; + } + } + if (likely(glb_cpu_arch->compat_big)) { + cls = glb_cpu_arch->compat_big->cluster; + while (cls) { + if (cls->cluster_id == cluster_id) + goto found; + cls = cls->next; + } + } + + cls = __get_cluster_slowpath(cluster_id); + BUG_ON(!cls); // debug only.. remove later... + if (!cls) + return; + +found: + cpumask_copy(cpus, &cls->siblings); +} + +/* + * arch_get_big_little_cpus - get big/LITTLE cores in cpumask + * @big: the cpumask pointer of big cores + * @little: the cpumask pointer of little cores + * + * Treat it as little cores, if it's not big.LITTLE architecture + */ +void arch_get_big_little_cpus(struct cpumask *big, struct cpumask *little) +{ + int type; + struct cpu_cluster *cls = NULL; + struct cpumask tmpmask; + unsigned int cpu; + + if (unlikely(!glb_cpu_arch->arch_ready)) + BUG(); + + type = __arch_type(); + spin_lock(&__cpu_cluster_lock); + switch(type) { + case ARCH_BIG_LITTLE: + if (likely(1 == glb_cpu_arch->compat_big->clscnt)) { + cls = glb_cpu_arch->compat_big->cluster; + cpumask_copy(big, &cls->siblings); + } else { + cls = glb_cpu_arch->compat_big->cluster; + while (cls) { + cpumask_or(&tmpmask, big, &cls->siblings); + cpumask_copy(big, &tmpmask); + cls = cls->next; + } + } + if (likely(1 == glb_cpu_arch->compat_ltt->clscnt)) { + cls = glb_cpu_arch->compat_ltt->cluster; + cpumask_copy(little, &cls->siblings); + } else { + cls = glb_cpu_arch->compat_ltt->cluster; + while (cls) { + cpumask_or(&tmpmask, little, &cls->siblings); + cpumask_copy(little, &tmpmask); + cls = cls->next; + } + } + break; + default: + /* treat as little */ + cpumask_clear(big); + cpumask_clear(little); + for_each_possible_cpu(cpu) + cpumask_set_cpu(cpu, little); + } + spin_unlock(&__cpu_cluster_lock); +} +#else /* !CONFIG_MTK_CPU_TOPOLOGY */ +int arch_cpu_is_big(unsigned int cpu) { return 0; } +int arch_cpu_is_little(unsigned int cpu) { return 1; } +int arch_is_big_little(void) { return 0; } + +int arch_get_nr_clusters(void) +{ + int max_id = 0; + unsigned int cpu; + + // assume socket id is monotonic increasing without gap. + for_each_possible_cpu(cpu) { + struct cputopo_arm *arm_cputopo = &cpu_topology[cpu]; + if (arm_cputopo->socket_id > max_id) + max_id = arm_cputopo->socket_id; + } + return max_id+1; +} + +int arch_is_multi_cluster(void) +{ + return (arch_get_nr_clusters() > 1 ? 1 : 0); +} + +int arch_get_cluster_id(unsigned int cpu) +{ + struct cputopo_arm *arm_cputopo = &cpu_topology[cpu]; + return arm_cputopo->socket_id < 0 ? 0 : arm_cputopo->socket_id; +} + +void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id) +{ + unsigned int cpu, found_id = -1; + + for_each_possible_cpu(cpu) { + struct cputopo_arm *arm_cputopo = &cpu_topology[cpu]; + if (arm_cputopo->socket_id == cluster_id) { + found_id = cluster_id; + break; + } + } + if (-1 == found_id || cluster_to_logical_mask(found_id, cpus)) { + cpumask_clear(cpus); + for_each_possible_cpu(cpu) + cpumask_set_cpu(cpu, cpus); + } +} +void arch_get_big_little_cpus(struct cpumask *big, struct cpumask *little) +{ + unsigned int cpu; + cpumask_clear(big); + cpumask_clear(little); + for_each_possible_cpu(cpu) + cpumask_set_cpu(cpu, little); +} +#endif /* CONFIG_MTK_CPU_TOPOLOGY */ + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + unsigned int mpidr; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; + + mpidr = read_cpuid_mpidr(); + + /* create cpu topology mapping */ + if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) { + /* + * This is a multiprocessor system + * multiprocessor format & multiprocessor mode field are set + */ + + if (mpidr & MPIDR_MT_BITMASK) { + /* core performance interdependency */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } else { + /* largely independent cores */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + } + } else { + /* + * This is an uniprocessor system + * we are in multiprocessor format but uniprocessor system + * or in the old uniprocessor format + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = 0; + cpuid_topo->socket_id = -1; + } + +#ifdef CONFIG_MTK_CPU_TOPOLOGY + verify_cputopo(cpuid, (u32)mpidr); +#endif + + update_siblings_masks(cpuid); + + update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK); + + printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", + cpuid, cpu_topology[cpuid].thread_id, + cpu_topology[cpuid].core_id, + cpu_topology[cpuid].socket_id, mpidr); +} + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + + return -EINVAL; +} + +#ifdef CONFIG_SCHED_HMP +static const char * const little_cores[] = { + "arm,cortex-a53", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) { + return true; + } + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 4) { + pr_err("* %s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_fast_cpu_mask; +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and power*/ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + + set_power_scale(cpu, SCHED_POWER_SCALE); + } + smp_wmb(); + + parse_dt_topology(); +} + + +#ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY +#include <linux/cpufreq.h> +#define ARCH_SCALE_INVA_CPU_CAP_PERCLS 1 + +struct cpufreq_extents { + u32 max; + u32 flags; + u32 const_max; + u32 throttling; +}; +/* Flag set when the governor in use only allows one frequency. + * Disables scaling. + */ +#define CPUPOWER_FREQINVAR_SINGLEFREQ 0x01 +static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS]; + +static unsigned long get_max_cpu_power(void) +{ + unsigned long max_cpu_power = 0; + int cpu; + for_each_online_cpu(cpu){ + if( per_cpu(cpu_scale, cpu) > max_cpu_power) + max_cpu_power = per_cpu(cpu_scale, cpu); + } + return max_cpu_power; +} + +int arch_get_cpu_throttling(int cpu) +{ + return freq_scale[cpu].throttling; +} + +/* Called when the CPU Frequency is changed. + * Once for each CPU. + */ +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + struct cpufreq_extents *extents; + unsigned int curr_freq; +#ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS + int i = 0; +#endif + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (val != CPUFREQ_POSTCHANGE) + return NOTIFY_OK; + + /* if dynamic load scale is disabled, set the load scale to 1.0 */ + if (!frequency_invariant_power_enabled) { + per_cpu(invariant_cpu_capacity, cpu) = per_cpu(base_cpu_capacity, cpu); + return NOTIFY_OK; + } + + extents = &freq_scale[cpu]; + if (extents->max < extents->const_max) { + extents->throttling = 1; + } else { + extents->throttling = 0; + } + /* If our governor was recognised as a single-freq governor, + * use curr = max to be sure multiplier is 1.0 + */ + if (extents->flags & CPUPOWER_FREQINVAR_SINGLEFREQ) + curr_freq = extents->max >> CPUPOWER_FREQSCALE_SHIFT; + else + curr_freq = freq->new >> CPUPOWER_FREQSCALE_SHIFT; + +#ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS + for_each_cpu(i, topology_core_cpumask(cpu)) { + per_cpu(invariant_cpu_capacity, i) = DIV_ROUND_UP( + (curr_freq * per_cpu(prescaled_cpu_capacity, i)), CPUPOWER_FREQSCALE_DEFAULT); + } +#else + per_cpu(invariant_cpu_capacity, cpu) = DIV_ROUND_UP( + (curr_freq * per_cpu(prescaled_cpu_capacity, cpu)), CPUPOWER_FREQSCALE_DEFAULT); +#endif + return NOTIFY_OK; +} + +/* Called when the CPUFreq governor is changed. + * Only called for the CPUs which are actually changed by the + * userspace. + */ +static int cpufreq_policy_callback(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + struct cpufreq_extents *extents; + int cpu, singleFreq = 0, cpu_capacity; + static const char performance_governor[] = "performance"; + static const char powersave_governor[] = "powersave"; + unsigned long max_cpu_power; +#ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS + int i = 0; +#endif + + if (event == CPUFREQ_START) + return 0; + + if (event != CPUFREQ_INCOMPATIBLE) + return 0; + + /* CPUFreq governors do not accurately report the range of + * CPU Frequencies they will choose from. + * We recognise performance and powersave governors as + * single-frequency only. + */ + if (!strncmp(policy->governor->name, performance_governor, + strlen(performance_governor)) || + !strncmp(policy->governor->name, powersave_governor, + strlen(powersave_governor))) + singleFreq = 1; + + max_cpu_power = get_max_cpu_power(); + /* Make sure that all CPUs impacted by this policy are + * updated since we will only get a notification when the + * user explicitly changes the policy on a CPU. + */ + for_each_cpu(cpu, policy->cpus) { + /* scale cpu_power to max(1024) */ + cpu_capacity = (per_cpu(cpu_scale, cpu) << CPUPOWER_FREQSCALE_SHIFT) + / max_cpu_power; + extents = &freq_scale[cpu]; + extents->max = policy->max >> CPUPOWER_FREQSCALE_SHIFT; + extents->const_max = policy->cpuinfo.max_freq >> CPUPOWER_FREQSCALE_SHIFT; + if (!frequency_invariant_power_enabled) { + /* when disabled, invariant_cpu_scale = cpu_scale */ + per_cpu(base_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT; + per_cpu(invariant_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT; + /* unused when disabled */ + per_cpu(prescaled_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT; + } else { + if (singleFreq) + extents->flags |= CPUPOWER_FREQINVAR_SINGLEFREQ; + else + extents->flags &= ~CPUPOWER_FREQINVAR_SINGLEFREQ; + per_cpu(base_cpu_capacity, cpu) = cpu_capacity; +#ifdef CONFIG_SCHED_HMP_ENHANCEMENT + per_cpu(prescaled_cpu_capacity, cpu) = + ((cpu_capacity << CPUPOWER_FREQSCALE_SHIFT) / extents->const_max); +#else + per_cpu(prescaled_cpu_capacity, cpu) = + ((cpu_capacity << CPUPOWER_FREQSCALE_SHIFT) / extents->max); +#endif + +#ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS + for_each_cpu(i, topology_core_cpumask(cpu)) { + per_cpu(invariant_cpu_capacity, i) = DIV_ROUND_UP( + ((policy->cur>>CPUPOWER_FREQSCALE_SHIFT) * + per_cpu(prescaled_cpu_capacity, i)), CPUPOWER_FREQSCALE_DEFAULT); + } +#else + per_cpu(invariant_cpu_capacity, cpu) = DIV_ROUND_UP( + ((policy->cur>>CPUPOWER_FREQSCALE_SHIFT) * + per_cpu(prescaled_cpu_capacity, cpu)), CPUPOWER_FREQSCALE_DEFAULT); +#endif + } + } + return 0; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; +static struct notifier_block cpufreq_policy_notifier = { + .notifier_call = cpufreq_policy_callback, +}; + +static int __init register_topology_cpufreq_notifier(void) +{ + int ret; + + /* init safe defaults since there are no policies at registration */ + for (ret = 0; ret < CONFIG_NR_CPUS; ret++) { + /* safe defaults */ + freq_scale[ret].max = CPUPOWER_FREQSCALE_DEFAULT; + per_cpu(base_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT; + per_cpu(invariant_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT; + per_cpu(prescaled_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT; + } + + pr_info("topology: registering cpufreq notifiers for scale-invariant CPU Power\n"); + ret = cpufreq_register_notifier(&cpufreq_policy_notifier, + CPUFREQ_POLICY_NOTIFIER); + + if (ret != -EINVAL) + ret = cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + + return ret; +} + +core_initcall(register_topology_cpufreq_notifier); +#endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c new file mode 100644 index 000000000..807f7f045 --- /dev/null +++ b/arch/arm64/kernel/traps.c @@ -0,0 +1,481 @@ +/* + * Based on arch/arm/kernel/traps.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/kallsyms.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/hardirq.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/syscalls.h> + +#include <asm/atomic.h> +#include <asm/debug-monitors.h> +#include <asm/traps.h> +#include <asm/stacktrace.h> +#include <asm/exception.h> +#include <asm/system_misc.h> +#include <asm/cacheflush.h> + +static const char *handler[]= { + "Synchronous Abort", + "IRQ", + "FIQ", + "Error" +}; + +int show_unhandled_signals = 1; + +/* + * Dump out the contents of some memory nicely... + */ +static void dump_mem(const char *lvl, const char *str, unsigned long bottom, + unsigned long top) +{ + unsigned long first; + mm_segment_t fs; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top); + + for (first = bottom & ~31; first < top; first += 32) { + unsigned long p; + char str[sizeof(" 12345678") * 8 + 1]; + + memset(str, ' ', sizeof(str)); + str[sizeof(str) - 1] = '\0'; + + for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { + if (p >= bottom && p < top) { + unsigned int val; + if (__get_user(val, (unsigned int *)p) == 0) + sprintf(str + i * 9, " %08x", val); + else + sprintf(str + i * 9, " ????????"); + } + } + printk("%s%04lx:%s\n", lvl, first & 0xffff, str); + } + + set_fs(fs); +} + +static void dump_backtrace_entry(unsigned long where, unsigned long stack) +{ + print_ip_sym(where); + if (in_exception_text(where)) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs) + 180); /* Additional 180 to workaround sp offset */ +} + +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + unsigned long addr = instruction_pointer(regs); + mm_segment_t fs; + char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; + int i; + + /* + * We need to switch to kernel mode so that we can use __get_user + * to safely read from kernel space. Note that we now dump the + * code first, just in case the backtrace kills us. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + + for (i = -4; i < 1; i++) { + unsigned int val, bad; + + bad = __get_user(val, &((u32 *)addr)[i]); + + if (!bad) + p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); + else { + p += sprintf(p, "bad PC value"); + break; + } + } + printk("%sCode: %s\n", lvl, str); + + set_fs(fs); +} + +static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +{ + struct stackframe frame; + const register unsigned long current_sp asm ("sp"); + + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (!tsk) + tsk = current; + + if (regs) { + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + } else if (tsk == current) { + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)dump_backtrace; + } else { + /* + * task blocked in __switch_to + */ + frame.fp = thread_saved_fp(tsk); + frame.sp = thread_saved_sp(tsk); + frame.pc = thread_saved_pc(tsk); + } + + printk("Call trace:\n"); + while (1) { + unsigned long where = frame.pc; + int ret; + + ret = unwind_frame(&frame); + if (ret < 0) + break; + dump_backtrace_entry(where, frame.sp); + } +} + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ + dump_backtrace(NULL, tsk); + barrier(); +} + +#ifdef CONFIG_PREEMPT +#define S_PREEMPT " PREEMPT" +#else +#define S_PREEMPT "" +#endif +#ifdef CONFIG_SMP +#define S_SMP " SMP" +#else +#define S_SMP "" +#endif + +static int __die(const char *str, int err, struct thread_info *thread, + struct pt_regs *regs) +{ + unsigned long sp, stack; + struct task_struct *tsk = thread->task; + static int die_counter; + int ret; + + pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", + str, err, ++die_counter); + + /* trap and error numbers are mostly meaningless on ARM */ + ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); + if (ret == NOTIFY_STOP) + return ret; + + print_modules(); + __show_regs(regs); + pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + + if (!user_mode(regs) || in_interrupt()) { + sp = regs->sp; + stack = (unsigned long)task_stack_page(tsk); + dump_mem(KERN_EMERG, "Stack: ", sp, ALIGN(sp, THREAD_SIZE)); + if (sp < stack || (sp - stack) > THREAD_SIZE) { + printk(KERN_EMERG "Invalid sp[%lx] or stack address[%lx]\n", sp, stack); + dump_mem(KERN_EMERG, "Stack(backup) ", stack, THREAD_SIZE + stack); + } + dump_backtrace(regs, tsk); + dump_instr(KERN_EMERG, regs); + } + + return ret; +} + +static DEFINE_RAW_SPINLOCK(die_lock); + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + struct thread_info *thread = current_thread_info(); + int ret; + + oops_enter(); + + raw_spin_lock_irq(&die_lock); + console_verbose(); + bust_spinlocks(1); + ret = __die(str, err, thread, regs); + + if (regs && kexec_should_crash(thread->task)) + crash_kexec(regs); + + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + /* keep preemption/irq disabled in KE flow to prevent context switch*/ + //raw_spin_unlock_irq(&die_lock); + oops_exit(); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + if (ret != NOTIFY_STOP) + do_exit(SIGSEGV); +} + +void arm64_notify_die(const char *str, struct pt_regs *regs, + struct siginfo *info, int err) +{ + if (user_mode(regs)) + force_sig_info(info->si_signo, info, current); + else + die(str, regs, err); +} + +static LIST_HEAD(undef_hook); + +void register_undef_hook(struct undef_hook *hook) +{ + list_add(&hook->node, &undef_hook); +} + +static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +{ + struct undef_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; + + list_for_each_entry(hook, &undef_hook, node) + if ((instr & hook->instr_mask) == hook->instr_val && + (regs->pstate & hook->pstate_mask) == hook->pstate_val) + fn = hook->fn; + + return fn ? fn(regs, instr) : 1; +} + +static DEFINE_PER_CPU(void *, __prev_undefinstr_pc) = 0; +static DEFINE_PER_CPU(int, __prev_undefinstr_counter) = 0; + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ + u32 instr; + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + struct thread_info *thread = current_thread_info(); + + /* check for AArch32 breakpoint instructions */ + if (!aarch32_break_handler(regs)) + return; + if (user_mode(regs)) { + if (compat_thumb_mode(regs)) { + if (get_user(instr, (u16 __user *)pc)) + goto die_sig; + if (is_wide_instruction(instr)) { + u32 instr2; + if (get_user(instr2, (u16 __user *)pc+1)) + goto die_sig; + instr <<= 16; + instr |= instr2; + } + } else if (get_user(instr, (u32 __user *)pc)) { + goto die_sig; + } + } else { + /* kernel mode */ + instr = *((u32 *)pc); + } + + if (call_undef_hook(regs, instr) == 0) + return; + +die_sig: + if (show_unhandled_signals && unhandled_signal(current, SIGILL) && + printk_ratelimit()) { + pr_info("%s[%d]: undefined instruction: pc=%p\n", + current->comm, task_pid_nr(current), pc); + dump_instr(KERN_INFO, regs); + } + + /* Place the SIGILL ICache Invalidate after the Debugger Undefined-Instruction Solution. */ + if ((user_mode(regs)) || processor_mode(regs) == PSR_MODE_EL1h) { + void **prev_undefinstr_pc = &get_cpu_var(__prev_undefinstr_pc); + int *prev_undefinstr_counter = &get_cpu_var(__prev_undefinstr_counter); + + /* Only do it for User-Space Application. */ + pr_alert("USR_MODE / SVC_MODE Undefined Instruction Address curr:%p pc=%p:%p, instr: 0x%x compat: %s\n", + (void *)current, (void *)pc, (void *)*prev_undefinstr_pc, instr, + is_compat_task() ? "yes" : "no"); + if ((*prev_undefinstr_pc != pc)) { + /* If the current process or program counter is changed......renew the counter. */ + pr_alert("First Time Recovery curr:%p pc=%p:%p\n", + (void *)current, (void *)pc, (void *)*prev_undefinstr_pc); + *prev_undefinstr_pc = pc; + *prev_undefinstr_counter = 0; + put_cpu_var(__prev_undefinstr_pc); + put_cpu_var(__prev_undefinstr_counter); + __flush_icache_all(); + flush_cache_all(); + /* + * undo cpu_excp to cancel nest_panic code, see entry.S + */ + if (!user_mode(regs)) { + thread->cpu_excp--; + } + return; + } + else if(*prev_undefinstr_counter < 1) { + pr_alert("2nd Time Recovery curr:%p pc=%p:%p\n", + (void *)current, (void *)pc, + (void *)*prev_undefinstr_pc); + *prev_undefinstr_counter += 1; + put_cpu_var(__prev_undefinstr_pc); + put_cpu_var(__prev_undefinstr_counter); + __flush_icache_all(); + flush_cache_all(); + /* + * undo cpu_excp to cancel nest_panic code, see entry.S + */ + if (!user_mode(regs)) { + thread->cpu_excp--; + } + return; + } + *prev_undefinstr_counter += 1; + if(*prev_undefinstr_counter >= 4) { + /* 2=first time SigILL,3=2nd time NE-SigILL,4=3rd time CoreDump-SigILL */ + *prev_undefinstr_pc = 0; + *prev_undefinstr_counter = 0; + } + put_cpu_var(__prev_undefinstr_pc); + put_cpu_var(__prev_undefinstr_counter); + pr_alert("Go to ARM Notify Die curr:%p pc=%p:%p\n", + (void *)current, (void *)pc, (void *)*prev_undefinstr_pc); + } + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; + + arm64_notify_die("Oops - undefined instruction", regs, &info, 0); +} + +long compat_arm_syscall(struct pt_regs *regs); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT + long ret; + if (is_compat_task()) { + ret = compat_arm_syscall(regs); + if (ret != -ENOSYS) + return ret; + } +#endif + + if (show_unhandled_signals && printk_ratelimit()) { + pr_info("%s[%d]: syscall %d\n", current->comm, + task_pid_nr(current), (int)regs->syscallno); + dump_instr("", regs); + if (user_mode(regs)) + __show_regs(regs); + } + + return sys_ni_syscall(); +} + +#ifdef CONFIG_MEDIATEK_SOLUTION +static void (*async_abort_handler)(struct pt_regs *regs, void *); +static void *async_abort_priv; + +int register_async_abort_handler(void (*fn)(struct pt_regs *regs, void *), void *priv) +{ + async_abort_handler = fn; + async_abort_priv = priv; + + return 0; +} +#endif + +/* + * bad_mode handles the impossible case in the exception vector. + */ +asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + console_verbose(); + +#ifdef CONFIG_MEDIATEK_SOLUTION + /* + * reason is defined in entry.S, 3 means BAD_ERROR, + * which would be triggered by async abort + */ + if ((reason == 3) && async_abort_handler) { + async_abort_handler(regs, async_abort_priv); + } +#endif + pr_crit("Bad mode in %s handler detected, code 0x%08x\n", + handler[reason], esr); + __show_regs(regs); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; + + arm64_notify_die("Oops - bad mode", regs, &info, 0); +} + +void __pte_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pte %016lx.\n", file, line, val); +} + +void __pmd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pmd %016lx.\n", file, line, val); +} + +void __pgd_error(const char *file, int line, unsigned long val) +{ + printk("%s:%d: bad pgd %016lx.\n", file, line, val); +} + +void __init trap_init(void) +{ + return; +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c new file mode 100644 index 000000000..0ea7a22bc --- /dev/null +++ b/arch/arm64/kernel/vdso.c @@ -0,0 +1,257 @@ +/* + * VDSO implementation for AArch64 and vector page setup for AArch32. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/kernel.h> +#include <linux/clocksource.h> +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/timekeeper_internal.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/signal32.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +extern char vdso_start, vdso_end; +static unsigned long vdso_pages; +static struct page **vdso_pagelist; + +/* + * The vDSO data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +#ifdef CONFIG_COMPAT +/* + * Create and map the vectors page for AArch32 tasks. + */ +static struct page *vectors_page[1]; + +static int alloc_vectors_page(void) +{ + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + unsigned long vpage; + + vpage = get_zeroed_page(GFP_ATOMIC); + + if (!vpage) + return -ENOMEM; + + /* kuser helpers */ + memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start, + kuser_sz); + + /* sigreturn code */ + memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, + aarch32_sigret_code, sizeof(aarch32_sigret_code)); + + flush_icache_range(vpage, vpage + PAGE_SIZE); + vectors_page[0] = virt_to_page(vpage); + + return 0; +} +arch_initcall(alloc_vectors_page); + +int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + unsigned long addr = AARCH32_VECTORS_BASE; + int ret; + + down_write(&mm->mmap_sem); + current->mm->context.vdso = (void *)addr; + + /* Map vectors page at the high address. */ + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + vectors_page); + + up_write(&mm->mmap_sem); + + return ret; +} +#endif /* CONFIG_COMPAT */ + +static int __init vdso_init(void) +{ + struct page *pg; + char *vbase; + int i, ret = 0; + + vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", + vdso_pages + 1, vdso_pages, 1L, &vdso_start); + + /* Allocate the vDSO pagelist, plus a page for the data. */ + vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + GFP_KERNEL); + if (vdso_pagelist == NULL) { + pr_err("Failed to allocate vDSO pagelist!\n"); + return -ENOMEM; + } + + /* Grab the vDSO code pages. */ + for (i = 0; i < vdso_pages; i++) { + pg = virt_to_page(&vdso_start + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso_pagelist[i] = pg; + } + + /* Sanity check the shared object header. */ + vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); + if (vbase == NULL) { + pr_err("Failed to map vDSO pagelist!\n"); + return -ENOMEM; + } else if (memcmp(vbase, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + ret = -EINVAL; + goto unmap; + } + + /* Grab the vDSO data page. */ + pg = virt_to_page(vdso_data); + get_page(pg); + vdso_pagelist[i] = pg; + +unmap: + vunmap(vbase); + return ret; +} +arch_initcall(vdso_init); + +int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp) +{ + struct mm_struct *mm = current->mm; + unsigned long vdso_base, vdso_mapping_len; + int ret; + + /* Be sure to map the data page */ + vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = vdso_base; + goto up_fail; + } + mm->context.vdso = (void *)vdso_base; + + ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (ret) { + mm->context.vdso = NULL; + goto up_fail; + } + +up_fail: + up_write(&mm->mmap_sem); + + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + /* + * We can re-use the vdso pointer in mm_context_t for identifying + * the vectors page for compat applications. The vDSO will always + * sit above TASK_UNMAPPED_BASE and so we don't need to worry about + * it conflicting with the vectors base. + */ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { +#ifdef CONFIG_COMPAT + if (vma->vm_start == AARCH32_VECTORS_BASE) + return "[vectors]"; +#endif + return "[vdso]"; + } + + return NULL; +} + +/* + * We define AT_SYSINFO_EHDR, so we need these function stubs to keep + * Linux happy. + */ +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec xtime_coarse; + u32 use_syscall = strcmp(tk->clock->name, "arch_sys_counter"); + + ++vdso_data->tb_seq_count; + smp_wmb(); + + xtime_coarse = __current_kernel_time(); + vdso_data->use_syscall = use_syscall; + vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; + vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; + + if (!use_syscall) { + vdso_data->cs_cycle_last = tk->clock->cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->cs_mult = tk->mult; + vdso_data->cs_shift = tk->shift; + } + + smp_wmb(); + ++vdso_data->tb_seq_count; +} + +void update_vsyscall_tz(void) +{ + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; +} diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore new file mode 100644 index 000000000..b8cc94e96 --- /dev/null +++ b/arch/arm64/kernel/vdso/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso-offsets.h diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile new file mode 100644 index 000000000..6d20b7d16 --- /dev/null +++ b/arch/arm64/kernel/vdso/Makefile @@ -0,0 +1,63 @@ +# +# Building a vDSO image for AArch64. +# +# Author: Will Deacon <will.deacon@arm.com> +# Heavily based on the vDSO Makefiles for other archs. +# + +obj-vdso := gettimeofday.o note.o sigreturn.o + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +obj-y += vdso.o +extra-y += vdso.lds vdso-offsets.h +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso.o : $(obj)/vdso.so + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) + $(call if_changed,vdsold) + +# Strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ + cp $@ include/generated/ +endef + +$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Assembly rules for the .S files +$(obj-vdso): %.o: %.S + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000..01924ff07 --- /dev/null +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S new file mode 100644 index 000000000..fe652ffd3 --- /dev/null +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -0,0 +1,249 @@ +/* + * Userspace implementations of gettimeofday() and friends. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#define NSEC_PER_SEC_LO16 0xca00 +#define NSEC_PER_SEC_HI16 0x3b9a + +vdso_data .req x6 +use_syscall .req w7 +seqcnt .req w8 + + .macro seqcnt_acquire +9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + tbnz seqcnt, #0, 9999b + dmb ishld + ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] + .endm + + .macro seqcnt_read, cnt + dmb ishld + ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + .endm + + .macro seqcnt_check, cnt, fail + cmp \cnt, seqcnt + b.ne \fail + .endm + + .text + +/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__kernel_gettimeofday) + .cfi_startproc + mov x2, x30 + .cfi_register x30, x2 + + /* Acquire the sequence counter and get the timespec. */ + adr vdso_data, _vdso_data +1: seqcnt_acquire + cbnz use_syscall, 4f + + /* If tv is NULL, skip to the timezone code. */ + cbz x0, 2f + bl __do_get_tspec + seqcnt_check w9, 1b + + /* Convert ns to us. */ + mov x13, #1000 + lsl x13, x13, x12 + udiv x11, x11, x13 + stp x10, x11, [x0, #TVAL_TV_SEC] +2: + /* If tz is NULL, return 0. */ + cbz x1, 3f + ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] + stp w4, w5, [x1, #TZ_MINWEST] +3: + mov x0, xzr + ret x2 +4: + /* Syscall fallback. */ + mov x8, #__NR_gettimeofday + svc #0 + ret x2 + .cfi_endproc +ENDPROC(__kernel_gettimeofday) + +/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__kernel_clock_gettime) + .cfi_startproc + cmp w0, #CLOCK_REALTIME + ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + b.ne 2f + + mov x2, x30 + .cfi_register x30, x2 + + /* Get kernel timespec. */ + adr vdso_data, _vdso_data +1: seqcnt_acquire + cbnz use_syscall, 7f + + bl __do_get_tspec + seqcnt_check w9, 1b + + mov x30, x2 + + cmp w0, #CLOCK_MONOTONIC + b.ne 6f + + /* Get wtm timespec. */ + ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + + /* Check the sequence counter. */ + seqcnt_read w9 + seqcnt_check w9, 1b + b 4f +2: + cmp w0, #CLOCK_REALTIME_COARSE + ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne + b.ne 8f + + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + + /* Get coarse timespec. */ + adr vdso_data, _vdso_data +3: seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + + /* Get wtm timespec. */ + ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + + /* Check the sequence counter. */ + seqcnt_read w9 + seqcnt_check w9, 3b + + cmp w0, #CLOCK_MONOTONIC_COARSE + b.ne 6f +4: + /* Add on wtm timespec. */ + add x10, x10, x13 + lsl x14, x14, x12 + add x11, x11, x14 + + /* Normalise the new timespec. */ + mov x15, #NSEC_PER_SEC_LO16 + movk x15, #NSEC_PER_SEC_HI16, lsl #16 + lsl x15, x15, x12 + cmp x11, x15 + b.lt 5f + sub x11, x11, x15 + add x10, x10, #1 +5: + cmp x11, #0 + b.ge 6f + add x11, x11, x15 + sub x10, x10, #1 + +6: /* Store to the user timespec. */ + lsr x11, x11, x12 + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret +7: + mov x30, x2 +8: /* Syscall fallback. */ + mov x8, #__NR_clock_gettime + svc #0 + ret + .cfi_endproc +ENDPROC(__kernel_clock_gettime) + +/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__kernel_clock_getres) + .cfi_startproc + cbz w1, 3f + + cmp w0, #CLOCK_REALTIME + ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + b.ne 1f + + ldr x2, 5f + b 2f +1: + cmp w0, #CLOCK_REALTIME_COARSE + ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne + b.ne 4f + ldr x2, 6f +2: + stp xzr, x2, [x1] + +3: /* res == NULL. */ + mov w0, wzr + ret + +4: /* Syscall fallback. */ + mov x8, #__NR_clock_getres + svc #0 + ret +5: + .quad CLOCK_REALTIME_RES +6: + .quad CLOCK_COARSE_RES + .cfi_endproc +ENDPROC(__kernel_clock_getres) + +/* + * Read the current time from the architected counter. + * Expects vdso_data to be initialised. + * Clobbers the temporary registers (x9 - x15). + * Returns: + * - w9 = vDSO sequence counter + * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) + * - w12 = cs_shift + */ +ENTRY(__do_get_tspec) + .cfi_startproc + + /* Read from the vDSO data page. */ + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp w11, w12, [vdso_data, #VDSO_CS_MULT] + seqcnt_read w9 + + /* Read the virtual counter. */ + isb + mrs x15, cntvct_el0 + + /* Calculate cycle delta and convert to ns. */ + sub x10, x15, x10 + /* We can only guarantee 56 bits of precision. */ + movn x15, #0xff00, lsl #48 + and x10, x15, x10 + mul x10, x10, x11 + + /* Use the kernel time to calculate the new timespec. */ + mov x11, #NSEC_PER_SEC_LO16 + movk x11, #NSEC_PER_SEC_HI16, lsl #16 + lsl x11, x11, x12 + add x15, x10, x14 + udiv x14, x15, x11 + add x10, x13, x14 + mul x13, x14, x11 + sub x11, x15, x13 + + ret + .cfi_endproc +ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S new file mode 100644 index 000000000..b82c85e5d --- /dev/null +++ b/arch/arm64/kernel/vdso/note.S @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S new file mode 100644 index 000000000..20d98effa --- /dev/null +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -0,0 +1,37 @@ +/* + * Sigreturn trampoline for returning from a signal when the SA_RESTORER + * flag is not set. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text + + nop +ENTRY(__kernel_rt_sigreturn) + .cfi_startproc + .cfi_signal_frame + .cfi_def_cfa x29, 0 + .cfi_offset x29, 0 * 8 + .cfi_offset x30, 1 * 8 + mov x8, #__NR_rt_sigreturn + svc #0 + .cfi_endproc +ENDPROC(__kernel_rt_sigreturn) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S new file mode 100644 index 000000000..60c1db54b --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.S @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/arm64/kernel/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S new file mode 100644 index 000000000..8154b8d1c --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -0,0 +1,100 @@ +/* + * GNU linker script for the VDSO library. +* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + + .text : { *(.text*) } :text =0xd503201f + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + _end = .; + PROVIDE(end = .); + + . = ALIGN(PAGE_SIZE); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6.39 { + global: + __kernel_rt_sigreturn; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp = __kernel_rt_sigreturn; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S new file mode 100644 index 000000000..2c8a95b53 --- /dev/null +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -0,0 +1,125 @@ +/* + * ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + */ + +#include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/page.h> + +#define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x + +OUTPUT_ARCH(aarch64) +ENTRY(stext) + +jiffies = jiffies_64; + +SECTIONS +{ + /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + */ + /DISCARD/ : { + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL + *(.discard) + *(.discard.*) + } + + . = PAGE_OFFSET + TEXT_OFFSET; + + .head.text : { + _text = .; + HEAD_TEXT + } + .text : { /* Real text segment */ + _stext = .; /* Text and read-only data */ + __exception_text_start = .; + *(.exception.text) + __exception_text_end = .; + IRQENTRY_TEXT + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.gnu.warning) + . = ALIGN(16); + *(.got) /* Global offset table */ + } + + RO_DATA(PAGE_SIZE) + + _etext = .; /* End of text and rodata section */ + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + INIT_TEXT_SECTION(8) + .exit.text : { + ARM_EXIT_KEEP(EXIT_TEXT) + } + . = ALIGN(16); + .init.data : { + INIT_DATA + INIT_SETUP(16) + INIT_CALLS + CON_INITCALL + SECURITY_INITCALL + INIT_RAM_FS + } + .exit.data : { + ARM_EXIT_KEEP(EXIT_DATA) + } + + PERCPU_SECTION(64) + + __init_end = .; + . = ALIGN(THREAD_SIZE); + __data_loc = .; + + .data : AT(__data_loc) { + _data = .; /* address in memory */ + _sdata = .; + + /* + * first, the init task union, aligned + * to an 8192 byte boundary. + */ + INIT_TASK_DATA(THREAD_SIZE) + NOSAVE_DATA + CACHELINE_ALIGNED_DATA(64) + READ_MOSTLY_DATA(64) + + /* + * The exception fixup table (might need resorting at runtime) + */ + . = ALIGN(32); + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + + /* + * and the usual data section + */ + DATA_DATA + CONSTRUCTORS + + _edata = .; + } + _edata_loc = __data_loc + SIZEOF(.data); + + NOTES + + BSS_SECTION(0, 0, 0) + _end = .; + + STABS_DEBUG + .comment 0 : { *(.comment) } +} |
