From 57dace2391ba10135e38457904121e7ef34d0c83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Apr 2007 21:08:15 +0200 Subject: [PATCH] [POWERPC] spufs: make spu page faults not block scheduling Until now, we have always entered the spu page fault handler with a mutex for the spu context held. This has multiple bad side-effects: - it becomes impossible to suspend the context during page faults - if an spu program attempts to access its own mmio areas through DMA, we get an immediate livelock when the nopage function tries to acquire the same mutex This patch makes the page fault logic operate on a struct spu_context instead of a struct spu, and moves it from spu_base.c to a new file fault.c inside of spufs. We now also need to copy the dar and dsisr contents of the last fault into the saved context to have it accessible in case we schedule out the context before activating the page fault handler. Signed-off-by: Arnd Bergmann --- arch/powerpc/platforms/cell/spu_base.c | 103 ---------- arch/powerpc/platforms/cell/spufs/Makefile | 2 +- .../platforms/cell/spufs/backing_ops.c | 6 + arch/powerpc/platforms/cell/spufs/fault.c | 193 ++++++++++++++++++ arch/powerpc/platforms/cell/spufs/hw_ops.c | 9 + arch/powerpc/platforms/cell/spufs/run.c | 28 +-- arch/powerpc/platforms/cell/spufs/spufs.h | 4 + arch/powerpc/platforms/cell/spufs/switch.c | 8 +- include/asm-powerpc/mmu.h | 1 + include/asm-powerpc/spu_csa.h | 1 + 10 files changed, 225 insertions(+), 130 deletions(-) create mode 100644 arch/powerpc/platforms/cell/spufs/fault.c diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 6242f3c19f68..31fa55f33415 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -290,7 +290,6 @@ spu_irq_class_1(int irq, void *data) return stat ? IRQ_HANDLED : IRQ_NONE; } -EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom); static irqreturn_t spu_irq_class_2(int irq, void *data) @@ -462,108 +461,6 @@ void spu_free(struct spu *spu) } EXPORT_SYMBOL_GPL(spu_free); -static int spu_handle_mm_fault(struct spu *spu) -{ - struct mm_struct *mm = spu->mm; - struct vm_area_struct *vma; - u64 ea, dsisr, is_write; - int ret; - - ea = spu->dar; - dsisr = spu->dsisr; -#if 0 - if (!IS_VALID_EA(ea)) { - return -EFAULT; - } -#endif /* XXX */ - if (mm == NULL) { - return -EFAULT; - } - if (mm->pgd == NULL) { - return -EFAULT; - } - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ea); - if (!vma) - goto bad_area; - if (vma->vm_start <= ea) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; -#if 0 - if (expand_stack(vma, ea)) - goto bad_area; -#endif /* XXX */ -good_area: - is_write = dsisr & MFC_DSISR_ACCESS_PUT; - if (is_write) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (dsisr & MFC_DSISR_ACCESS_DENIED) - goto bad_area; - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - ret = 0; - switch (handle_mm_fault(mm, vma, ea, is_write)) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - ret = -EFAULT; - goto bad_area; - case VM_FAULT_OOM: - ret = -ENOMEM; - goto bad_area; - default: - BUG(); - } - up_read(&mm->mmap_sem); - return ret; - -bad_area: - up_read(&mm->mmap_sem); - return -EFAULT; -} - -int spu_irq_class_1_bottom(struct spu *spu) -{ - u64 ea, dsisr, access, error = 0UL; - int ret = 0; - - ea = spu->dar; - dsisr = spu->dsisr; - if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) { - u64 flags; - - access = (_PAGE_PRESENT | _PAGE_USER); - access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; - local_irq_save(flags); - if (hash_page(ea, access, 0x300) != 0) - error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; - local_irq_restore(flags); - } - if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) { - if ((ret = spu_handle_mm_fault(spu)) != 0) - error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; - else - error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR; - } - spu->dar = 0UL; - spu->dsisr = 0UL; - if (!error) { - spu_restart_dma(spu); - } else { - spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE); - } - return ret; -} - struct sysdev_class spu_sysdev_class = { set_kset_name("spu") }; diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index 472217d19faf..2cd89c11af5a 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -1,4 +1,4 @@ -obj-y += switch.o +obj-y += switch.o fault.o obj-$(CONFIG_SPU_FS) += spufs.o spufs-y += inode.o file.o context.o syscalls.o coredump.o diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 1898f0d3a8b8..3322528fa6eb 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx, return ret; } +static void spu_backing_restart_dma(struct spu_context *ctx) +{ + /* nothing to do here */ +} + struct spu_context_ops spu_backing_ops = { .mbox_read = spu_backing_mbox_read, .mbox_stat_read = spu_backing_mbox_stat_read, @@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops = { .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus, .get_mfc_free_elements = spu_backing_get_mfc_free_elements, .send_mfc_command = spu_backing_send_mfc_command, + .restart_dma = spu_backing_restart_dma, }; diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c new file mode 100644 index 000000000000..182dc914cbc9 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -0,0 +1,193 @@ +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include + +#include +#include + +#include "spufs.h" + +/* + * This ought to be kept in sync with the powerpc specific do_page_fault + * function. Currently, there are a few corner cases that we haven't had + * to handle fortunately. + */ +static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr) +{ + struct vm_area_struct *vma; + unsigned long is_write; + int ret; + +#if 0 + if (!IS_VALID_EA(ea)) { + return -EFAULT; + } +#endif /* XXX */ + if (mm == NULL) { + return -EFAULT; + } + if (mm->pgd == NULL) { + return -EFAULT; + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ea); + if (!vma) + goto bad_area; + if (vma->vm_start <= ea) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, ea)) + goto bad_area; +good_area: + is_write = dsisr & MFC_DSISR_ACCESS_PUT; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (dsisr & MFC_DSISR_ACCESS_DENIED) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + ret = 0; + switch (handle_mm_fault(mm, vma, ea, is_write)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + ret = -EFAULT; + goto bad_area; + case VM_FAULT_OOM: + ret = -ENOMEM; + goto bad_area; + default: + BUG(); + } + up_read(&mm->mmap_sem); + return ret; + +bad_area: + up_read(&mm->mmap_sem); + return -EFAULT; +} + +static void spufs_handle_dma_error(struct spu_context *ctx, int type) +{ + if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { + ctx->event_return |= type; + wake_up_all(&ctx->stop_wq); + } else { + switch (type) { + case SPE_EVENT_DMA_ALIGNMENT: + case SPE_EVENT_SPE_DATA_STORAGE: + case SPE_EVENT_INVALID_DMA: + force_sig(SIGBUS, /* info, */ current); + break; + case SPE_EVENT_SPE_ERROR: + force_sig(SIGILL, /* info */ current); + break; + } + } +} + +void spufs_dma_callback(struct spu *spu, int type) +{ + spufs_handle_dma_error(spu->ctx, type); +} +EXPORT_SYMBOL_GPL(spufs_dma_callback); + +/* + * bottom half handler for page faults, we can't do this from + * interrupt context, since we might need to sleep. + * we also need to give up the mutex so we can get scheduled + * out while waiting for the backing store. + * + * TODO: try calling hash_page from the interrupt handler first + * in order to speed up the easy case. + */ +int spufs_handle_class1(struct spu_context *ctx) +{ + u64 ea, dsisr, access; + unsigned long flags; + int ret; + + /* + * dar and dsisr get passed from the registers + * to the spu_context, to this function, but not + * back to the spu if it gets scheduled again. + * + * if we don't handle the fault for a saved context + * in time, we can still expect to get the same fault + * the immediately after the context restore. + */ + if (ctx->state == SPU_STATE_RUNNABLE) { + ea = ctx->spu->dar; + dsisr = ctx->spu->dsisr; + ctx->spu->dar= ctx->spu->dsisr = 0; + } else { + ea = ctx->csa.priv1.mfc_dar_RW; + dsisr = ctx->csa.priv1.mfc_dsisr_RW; + ctx->csa.priv1.mfc_dar_RW = 0; + ctx->csa.priv1.mfc_dsisr_RW = 0; + } + + if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) + return 0; + + pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, + dsisr, ctx->state); + + /* we must not hold the lock when entering spu_handle_mm_fault */ + spu_release(ctx); + + access = (_PAGE_PRESENT | _PAGE_USER); + access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; + local_irq_save(flags); + ret = hash_page(ea, access, 0x300); + local_irq_restore(flags); + + /* hashing failed, so try the actual fault handler */ + if (ret) + ret = spu_handle_mm_fault(current->mm, ea, dsisr); + + spu_acquire(ctx); + /* + * If we handled the fault successfully and are in runnable + * state, restart the DMA. + * In case of unhandled error report the problem to user space. + */ + if (!ret) { + if (ctx->spu) + ctx->ops->restart_dma(ctx); + } else + spufs_handle_dma_error(ctx, SPE_EVENT_SPE_DATA_STORAGE); + + return ret; +} +EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index ae42e03b8c86..428875c5e4ec 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struct spu_context *ctx, } } +static void spu_hw_restart_dma(struct spu_context *ctx) +{ + struct spu_priv2 __iomem *priv2 = ctx->spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); +} + struct spu_context_ops spu_hw_ops = { .mbox_read = spu_hw_mbox_read, .mbox_stat_read = spu_hw_mbox_stat_read, @@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = { .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus, .get_mfc_free_elements = spu_hw_get_mfc_free_elements, .send_mfc_command = spu_hw_send_mfc_command, + .restart_dma = spu_hw_restart_dma, }; diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 7df5202c9a90..1a8195bf75d5 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -18,27 +18,6 @@ void spufs_stop_callback(struct spu *spu) wake_up_all(&ctx->stop_wq); } -void spufs_dma_callback(struct spu *spu, int type) -{ - struct spu_context *ctx = spu->ctx; - - if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { - ctx->event_return |= type; - wake_up_all(&ctx->stop_wq); - } else { - switch (type) { - case SPE_EVENT_DMA_ALIGNMENT: - case SPE_EVENT_SPE_DATA_STORAGE: - case SPE_EVENT_INVALID_DMA: - force_sig(SIGBUS, /* info, */ current); - break; - case SPE_EVENT_SPE_ERROR: - force_sig(SIGILL, /* info */ current); - break; - } - } -} - static inline int spu_stopped(struct spu_context *ctx, u32 * stat) { struct spu *spu; @@ -294,11 +273,8 @@ int spu_process_callback(struct spu_context *ctx) static inline int spu_process_events(struct spu_context *ctx) { struct spu *spu = ctx->spu; - u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED; int ret = 0; - if (spu->dsisr & pte_fault) - ret = spu_irq_class_1_bottom(spu); if (spu->class_0_pending) ret = spu_irq_class_0_bottom(spu); if (!ret && signal_pending(current)) @@ -332,6 +308,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, break; status &= ~SPU_STATUS_STOPPED_BY_STOP; } + ret = spufs_handle_class1(ctx); + if (ret) + break; + if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { ret = spu_reacquire_runnable(ctx, npc, &status); if (ret) { diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index cae2ad435b0a..9993c9b3cffc 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -141,6 +141,7 @@ struct spu_context_ops { struct spu_dma_info * info); void (*proxydma_info_read) (struct spu_context * ctx, struct spu_proxydma_info * info); + void (*restart_dma)(struct spu_context *ctx); }; extern struct spu_context_ops spu_hw_ops; @@ -172,6 +173,9 @@ int put_spu_gang(struct spu_gang *gang); void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx); void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); +/* fault handling */ +int spufs_handle_class1(struct spu_context *ctx); + /* context management */ static inline void spu_acquire(struct spu_context *ctx) { diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index fd91c73de34e..8347c4a3f894 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -2084,6 +2084,10 @@ int spu_save(struct spu_state *prev, struct spu *spu) int rc; acquire_spu_lock(spu); /* Step 1. */ + prev->dar = spu->dar; + prev->dsisr = spu->dsisr; + spu->dar = 0; + spu->dsisr = 0; rc = __do_spu_save(prev, spu); /* Steps 2-53. */ release_spu_lock(spu); if (rc != 0 && rc != 2 && rc != 6) { @@ -2109,9 +2113,9 @@ int spu_restore(struct spu_state *new, struct spu *spu) acquire_spu_lock(spu); harvest(NULL, spu); - spu->dar = 0; - spu->dsisr = 0; spu->slb_replace = 0; + new->dar = 0; + new->dsisr = 0; spu->class_0_pending = 0; rc = __do_spu_restore(new, spu); release_spu_lock(spu); diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h index 200055a4b82b..e22fd8811505 100644 --- a/include/asm-powerpc/mmu.h +++ b/include/asm-powerpc/mmu.h @@ -234,6 +234,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned int local); struct mm_struct; +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); extern int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local, unsigned long trap); diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h index 8aad0619eb8e..02e56a6685a2 100644 --- a/include/asm-powerpc/spu_csa.h +++ b/include/asm-powerpc/spu_csa.h @@ -242,6 +242,7 @@ struct spu_state { u64 spu_chnldata_RW[32]; u32 spu_mailbox_data[4]; u32 pu_mailbox_data[1]; + u64 dar, dsisr; unsigned long suspend_time; spinlock_t register_lock; }; -- 2.34.1