From: Luke Browning Date: Thu, 20 Dec 2007 07:39:59 +0000 (+0900) Subject: [POWERPC] spufs: decouple spu scheduler from spufs_spu_run (asynchronous scheduling) X-Git-Tag: firefly_0821_release~23624^2~211 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e65c2f6fcebb9af0c3f53c796aff730dd657f5e7;p=firefly-linux-kernel-4.4.55.git [POWERPC] spufs: decouple spu scheduler from spufs_spu_run (asynchronous scheduling) Change spufs_spu_run so that the context is queued directly to the scheduler and the controlling thread advances directly to spufs_wait() for spe errors and exceptions. nosched contexts are treated the same as before. Fixes from Christoph Hellwig Signed-off-by: Luke Browning Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 6fa24d38706e..290b10e45105 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -133,37 +133,6 @@ void spu_unmap_mappings(struct spu_context *ctx) mutex_unlock(&ctx->mapping_lock); } -/** - * spu_acquire_runnable - lock spu contex and make sure it is in runnable state - * @ctx: spu contex to lock - * - * Note: - * Returns 0 and with the context locked on success - * Returns negative error and with the context _unlocked_ on failure. - */ -int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) -{ - int ret = -EINVAL; - - spu_acquire(ctx); - if (ctx->state == SPU_STATE_SAVED) { - /* - * Context is about to be freed, so we can't acquire it anymore. - */ - if (!ctx->owner) - goto out_unlock; - ret = spu_activate(ctx, flags); - if (ret) - goto out_unlock; - } - - return 0; - - out_unlock: - spu_release(ctx); - return ret; -} - /** * spu_acquire_saved - lock spu contex and make sure it is in saved state * @ctx: spu contex to lock diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 714972621220..78df905743b3 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -572,6 +572,9 @@ void spufs_ibox_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->ibox_wq); kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN); } @@ -708,6 +711,9 @@ void spufs_wbox_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->wbox_wq); kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT); } @@ -1339,6 +1345,9 @@ void spufs_mfc_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->mfc_wq); pr_debug("%s %s\n", __FUNCTION__, spu->name); diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 652ae1366dc8..b380050cdbc7 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -41,21 +41,29 @@ void spufs_stop_callback(struct spu *spu) spu->dar = 0; } -static inline int spu_stopped(struct spu_context *ctx, u32 *stat) +int spu_stopped(struct spu_context *ctx, u32 *stat) { - struct spu *spu; - u64 pte_fault; + u64 dsisr; + u32 stopped; *stat = ctx->ops->status_read(ctx); - spu = ctx->spu; - if (ctx->state != SPU_STATE_RUNNABLE || - test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + return 1; + + stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if (*stat & stopped) + return 1; + + dsisr = ctx->csa.dsisr; + if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) return 1; - pte_fault = ctx->csa.dsisr & - (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); - return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || ctx->csa.class_0_pending) ? - 1 : 0; + + if (ctx->csa.class_0_pending) + return 1; + + return 0; } static int spu_setup_isolated(struct spu_context *ctx) @@ -151,24 +159,27 @@ out: static int spu_run_init(struct spu_context *ctx, u32 *npc) { - unsigned long runcntl; + unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; int ret; spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - if (ctx->flags & SPU_CREATE_ISOLATE) { - /* - * Force activation of spu. Isolated state assumes that - * special loader context is loaded and running on spu. - */ + /* + * NOSCHED is synchronous scheduling with respect to the caller. + * The caller waits for the context to be loaded. + */ + if (ctx->flags & SPU_CREATE_NOSCHED) { if (ctx->state == SPU_STATE_SAVED) { - spu_set_timeslice(ctx); - ret = spu_activate(ctx, 0); if (ret) return ret; } + } + /* + * Apply special setup as required. + */ + if (ctx->flags & SPU_CREATE_ISOLATE) { if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { ret = spu_setup_isolated(ctx); if (ret) @@ -183,10 +194,11 @@ static int spu_run_init(struct spu_context *ctx, u32 *npc) (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); if (runcntl == 0) runcntl = SPU_RUNCNTL_RUNNABLE; + } + if (ctx->flags & SPU_CREATE_NOSCHED) { spuctx_switch_state(ctx, SPU_UTIL_USER); ctx->ops->runcntl_write(ctx, runcntl); - } else { unsigned long privcntl; @@ -194,20 +206,18 @@ static int spu_run_init(struct spu_context *ctx, u32 *npc) privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP; else privcntl = SPU_PRIVCNTL_MODE_NORMAL; - runcntl = SPU_RUNCNTL_RUNNABLE; ctx->ops->npc_write(ctx, *npc); ctx->ops->privcntl_write(ctx, privcntl); + ctx->ops->runcntl_write(ctx, runcntl); if (ctx->state == SPU_STATE_SAVED) { - spu_set_timeslice(ctx); ret = spu_activate(ctx, 0); if (ret) return ret; + } else { + spuctx_switch_state(ctx, SPU_UTIL_USER); } - - spuctx_switch_state(ctx, SPU_UTIL_USER); - ctx->ops->runcntl_write(ctx, runcntl); } return 0; @@ -218,6 +228,8 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, { int ret = 0; + spu_del_from_rq(ctx); + *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); @@ -230,26 +242,6 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, return ret; } -static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, - u32 *status) -{ - int ret; - - ret = spu_run_fini(ctx, npc, status); - if (ret) - return ret; - - if (*status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT)) - return *status; - - ret = spu_acquire_runnable(ctx, 0); - if (ret) - return ret; - - spuctx_switch_state(ctx, SPU_UTIL_USER); - return 0; -} - /* * SPU syscall restarting is tricky because we violate the basic * assumption that the signal handler is running on the interrupted @@ -386,17 +378,8 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) if (ret) break; - if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { - ret = spu_reacquire_runnable(ctx, npc, &status); - if (ret) - goto out2; - continue; - } - if (signal_pending(current)) ret = -ERESTARTSYS; - - } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_SINGLE_STEP))); @@ -411,7 +394,6 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); -out2: if ((ret == 0) || ((ret == -ERESTARTSYS) && ((status & SPU_STATUS_STOPPED_BY_HALT) || diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 54b338f8363f..2775c1652ba4 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -146,6 +146,10 @@ void spu_update_sched_info(struct spu_context *ctx) if (ctx->state == SPU_STATE_RUNNABLE) { node = ctx->spu->node; + + /* + * Take list_mutex to sync with find_victim(). + */ mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); mutex_unlock(&cbe_spu_info[node].list_mutex); @@ -487,6 +491,13 @@ static void __spu_add_to_rq(struct spu_context *ctx) } } +static void spu_add_to_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void __spu_del_from_rq(struct spu_context *ctx) { int prio = ctx->prio; @@ -501,10 +512,24 @@ static void __spu_del_from_rq(struct spu_context *ctx) } } +void spu_del_from_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void spu_prio_wait(struct spu_context *ctx) { DEFINE_WAIT(wait); + /* + * The caller must explicitly wait for a context to be loaded + * if the nosched flag is set. If NOSCHED is not set, the caller + * queues the context and waits for an spu event or error. + */ + BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED)); + spin_lock(&spu_prio->runq_lock); prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); if (!signal_pending(current)) { @@ -604,6 +629,7 @@ static struct spu *find_victim(struct spu_context *ctx) struct spu_context *tmp = spu->ctx; if (tmp && tmp->prio > ctx->prio && + !(tmp->flags & SPU_CREATE_NOSCHED) && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } @@ -644,13 +670,10 @@ static struct spu *find_victim(struct spu_context *ctx) victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; + spu_add_to_rq(victim); + mutex_unlock(&victim->state_mutex); - /* - * We need to break out of the wait loop in spu_run - * manually to ensure this context gets put on the - * runqueue again ASAP. - */ - wake_up(&victim->stop_wq); + return spu; } } @@ -658,6 +681,48 @@ static struct spu *find_victim(struct spu_context *ctx) return NULL; } +static void __spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + int success = 0; + + spu_set_timeslice(ctx); + + mutex_lock(&cbe_spu_info[node].list_mutex); + if (spu->ctx == NULL) { + spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + spu->alloc_state = SPU_USED; + success = 1; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (success) + wake_up_all(&ctx->run_wq); + else + spu_add_to_rq(ctx); +} + +static void spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + spu_acquire(ctx); + __spu_schedule(spu, ctx); + spu_release(ctx); +} + +static void spu_unschedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + spu->alloc_state = SPU_FREE; + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + mutex_unlock(&cbe_spu_info[node].list_mutex); +} + /** * spu_activate - find a free spu for a context and execute it * @ctx: spu context to schedule @@ -669,40 +734,47 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - do { - struct spu *spu; + struct spu *spu; - /* - * If there are multiple threads waiting for a single context - * only one actually binds the context while the others will - * only be able to acquire the state_mutex once the context - * already is in runnable state. - */ - if (ctx->spu) - return 0; + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; - spu = spu_get_idle(ctx); - /* - * If this is a realtime thread we try to get it running by - * preempting a lower priority thread. - */ - if (!spu && rt_prio(ctx->prio)) - spu = find_victim(ctx); - if (spu) { - int node = spu->node; +spu_activate_top: + if (signal_pending(current)) + return -ERESTARTSYS; - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_bind_context(spu, ctx); - cbe_spu_info[node].nr_active++; - mutex_unlock(&cbe_spu_info[node].list_mutex); - wake_up_all(&ctx->run_wq); - return 0; - } + spu = spu_get_idle(ctx); + /* + * If this is a realtime thread we try to get it running by + * preempting a lower priority thread. + */ + if (!spu && rt_prio(ctx->prio)) + spu = find_victim(ctx); + if (spu) { + unsigned long runcntl; + runcntl = ctx->ops->runcntl_read(ctx); + __spu_schedule(spu, ctx); + if (runcntl & SPU_RUNCNTL_RUNNABLE) + spuctx_switch_state(ctx, SPU_UTIL_USER); + + return 0; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { spu_prio_wait(ctx); - } while (!signal_pending(current)); + goto spu_activate_top; + } - return -ERESTARTSYS; + spu_add_to_rq(ctx); + + return 0; } /** @@ -744,21 +816,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - int node = spu->node; - - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_unbind_context(spu, ctx); - spu->alloc_state = SPU_FREE; - cbe_spu_info[node].nr_active--; - mutex_unlock(&cbe_spu_info[node].list_mutex); - - ctx->stats.vol_ctx_switch++; - spu->stats.vol_ctx_switch++; - - if (new) - wake_up(&new->stop_wq); + spu_unschedule(spu, ctx); + if (new) { + if (new->flags & SPU_CREATE_NOSCHED) + wake_up(&new->stop_wq); + else { + spu_release(ctx); + spu_schedule(spu, new); + spu_acquire(ctx); + } + } } - } return new != NULL; @@ -795,43 +863,37 @@ void spu_yield(struct spu_context *ctx) static noinline void spusched_tick(struct spu_context *ctx) { + struct spu_context *new = NULL; + struct spu *spu = NULL; + u32 status; + + spu_acquire(ctx); + + if (ctx->state != SPU_STATE_RUNNABLE) + goto out; + if (spu_stopped(ctx, &status)) + goto out; if (ctx->flags & SPU_CREATE_NOSCHED) - return; + goto out; if (ctx->policy == SCHED_FIFO) - return; + goto out; if (--ctx->time_slice) - return; + goto out; - /* - * Unfortunately list_mutex ranks outside of state_mutex, so - * we have to trylock here. If we fail give the context another - * tick and try again. - */ - if (mutex_trylock(&ctx->state_mutex)) { - struct spu *spu = ctx->spu; - struct spu_context *new; - - new = grab_runnable_context(ctx->prio + 1, spu->node); - if (new) { - spu_unbind_context(spu, ctx); - ctx->stats.invol_ctx_switch++; - spu->stats.invol_ctx_switch++; - spu->alloc_state = SPU_FREE; - cbe_spu_info[spu->node].nr_active--; - wake_up(&new->stop_wq); - /* - * We need to break out of the wait loop in - * spu_run manually to ensure this context - * gets put on the runqueue again ASAP. - */ - wake_up(&ctx->stop_wq); - } - spu_set_timeslice(ctx); - mutex_unlock(&ctx->state_mutex); + spu = ctx->spu; + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unschedule(spu, ctx); + spu_add_to_rq(ctx); } else { ctx->time_slice++; } +out: + spu_release(ctx); + + if (new) + spu_schedule(spu, new); } /** @@ -895,11 +957,20 @@ static int spusched_thread(void *unused) set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) - if (spu->ctx) - spusched_tick(spu->ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + struct mutex *mtx = &cbe_spu_info[node].list_mutex; + + mutex_lock(mtx); + list_for_each_entry(spu, &cbe_spu_info[node].spus, + cbe_list) { + struct spu_context *ctx = spu->ctx; + + if (ctx) { + mutex_unlock(mtx); + spusched_tick(ctx); + mutex_lock(mtx); + } + } + mutex_unlock(mtx); } } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index eaab1b239d02..412de58f5b0f 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -246,10 +246,11 @@ int put_spu_context(struct spu_context *ctx); void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); -int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); void spu_acquire_saved(struct spu_context *ctx); void spu_release_saved(struct spu_context *ctx); +int spu_stopped(struct spu_context *ctx, u32 * stat); +void spu_del_from_rq(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx);