From f556cb0caeec1ba9b8e5e2aa85b47e76277f5d4b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Sun, 2 Aug 2015 11:18:04 +0800 Subject: [PATCH] drm/amd: add scheduler fence implementation (v2) scheduler fence is based on kernel fence framework. v2: squash in Christian's build fix Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 34 +++--- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 33 +++++- drivers/gpu/drm/amd/scheduler/sched_fence.c | 112 ++++++++++++++++++ 9 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 drivers/gpu/drm/amd/scheduler/sched_fence.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f1cb7d2fa411..04c270757030 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o # GPU scheduler amdgpu-y += \ ../scheduler/gpu_scheduler.o \ + ../scheduler/sched_fence.o \ amdgpu_sched.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 987e3075a03f..2ba448ee948b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser { int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_cs_parser *sched_job); + struct amd_sched_fence *s_fence; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b1dc7e1ed271..f428288d8363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].entity, - parser); - cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; + mutex_lock(&parser->job_lock); + r = amd_sched_push_job(ring->scheduler, + &parser->ctx->rings[ring->idx].entity, + parser, + &parser->s_fence); + if (r) { + mutex_unlock(&parser->job_lock); + goto out; + } + parser->ibs[parser->num_ibs - 1].sequence = + amdgpu_ctx_add_fence(parser->ctx, ring, + &parser->s_fence->base, + parser->s_fence->v_seq); + cs->out.handle = parser->s_fence->v_seq; + mutex_unlock(&parser->job_lock); up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 232e800eea56..1833f05c7e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; - int r; - - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->entity, - seq, - false, - -1); - if (r) - return NULL; - } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index eed409c59492..5104e64e9ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (ib->ctx) + if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, &ib->fence->base, sequence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d82f2481bd0e..6a7e83edcaa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { - uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, num_ibs); @@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); - ibs[num_ibs - 1].sequence = v_seq; - amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].entity, - sched_job); - r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].entity, - v_seq, - false, - -1); - if (r) - WARN(true, "emit timeout\n"); - } else + mutex_lock(&sched_job->job_lock); + r = amd_sched_push_job(ring->scheduler, + &adev->kernel_ctx.rings[ring->idx].entity, + sched_job, &sched_job->s_fence); + if (r) { + mutex_unlock(&sched_job->job_lock); + kfree(sched_job); + return r; + } + ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; + *f = &sched_job->s_fence->base; + mutex_unlock(&sched_job->job_lock); + } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); - if (r) - return r; - *f = &ibs[num_ibs - 1].fence->base; + if (r) + return r; + *f = &ibs[num_ibs - 1].fence->base; + } return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 33b4f55e48b1..402086d96889 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs) { uint64_t seq_ring = 0; + char name[20]; if (!(sched && entity && rq)) return -EINVAL; @@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); + entity->fence_context = fence_context_alloc(1); + snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); + memcpy(entity->name, name, 20); + INIT_LIST_HEAD(&entity->fence_list); if(kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL)) @@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, spin_lock_init(&entity->queue_lock); atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring); + atomic64_set(&entity->last_signaled_v_seq, seq_ring); /* Add the entity to the run queue */ mutex_lock(&rq->lock); @@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data) + void *data, + struct amd_sched_fence **fence) { - struct amd_sched_job *job = kzalloc(sizeof(struct amd_sched_job), - GFP_KERNEL); + struct amd_sched_job *job; + + if (!fence) + return -EINVAL; + job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); if (!job) return -ENOMEM; job->sched = sched; job->s_entity = c_entity; job->data = data; + *fence = amd_sched_fence_create(c_entity); + if ((*fence) == NULL) { + kfree(job); + return -EINVAL; + } + job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { /** @@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) unsigned long flags; sched = sched_job->sched; + atomic64_set(&sched_job->s_entity->last_signaled_v_seq, + sched_job->s_fence->v_seq); + amd_sched_fence_signal(sched_job->s_fence); spin_lock_irqsave(&sched->queue_lock, flags); list_del(&sched_job->list); atomic64_dec(&sched->hw_rq_count); spin_unlock_irqrestore(&sched->queue_lock, flags); sched->ops->process_job(sched, sched_job); + fence_put(&sched_job->s_fence->base); kfree(sched_job); wake_up_interruptible(&sched->wait_queue); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f54615d6a500..300132f14d74 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -45,6 +45,7 @@ struct amd_sched_entity { /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; atomic64_t last_emitted_v_seq; + atomic64_t last_signaled_v_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -52,6 +53,9 @@ struct amd_sched_entity { wait_queue_head_t wait_queue; wait_queue_head_t wait_emit; bool is_pending; + uint64_t fence_context; + struct list_head fence_list; + char name[20]; }; /** @@ -72,14 +76,35 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; +struct amd_sched_fence { + struct fence base; + struct fence_cb cb; + struct list_head list; + struct amd_sched_entity *entity; + uint64_t v_seq; + spinlock_t lock; +}; + struct amd_sched_job { struct list_head list; struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; void *data; + struct amd_sched_fence *s_fence; }; +extern const struct fence_ops amd_sched_fence_ops; +static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) +{ + struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base); + + if (__f->base.ops == &amd_sched_fence_ops) + return __f; + + return NULL; +} + /** * Define the backend operations called by the scheduler, * these functions should be implemented in driver side @@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data); + void *data, + struct amd_sched_fence **fence); int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, @@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity); +void amd_sched_fence_signal(struct amd_sched_fence *fence); + + #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c new file mode 100644 index 000000000000..d580a357c547 --- /dev/null +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include +#include +#include +#include +#include "gpu_scheduler.h" + +static void amd_sched_fence_wait_cb(struct fence *f, struct fence_cb *cb) +{ + struct amd_sched_fence *fence = + container_of(cb, struct amd_sched_fence, cb); + list_del_init(&fence->list); + fence_put(&fence->base); +} + +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity) +{ + struct amd_sched_fence *fence = NULL; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); + fence->entity = s_entity; + spin_lock_init(&fence->lock); + fence_init(&fence->base, &amd_sched_fence_ops, + &fence->lock, + s_entity->fence_context, + fence->v_seq); + fence_get(&fence->base); + list_add_tail(&fence->list, &s_entity->fence_list); + if (fence_add_callback(&fence->base,&fence->cb, + amd_sched_fence_wait_cb)) { + fence_put(&fence->base); + kfree(fence); + return NULL; + } + return fence; +} + +bool amd_sched_check_ts(struct amd_sched_entity *s_entity, uint64_t v_seq) +{ + return atomic64_read(&s_entity->last_signaled_v_seq) >= v_seq ? true : false; +} + +void amd_sched_fence_signal(struct amd_sched_fence *fence) +{ + if (amd_sched_check_ts(fence->entity, fence->v_seq)) { + int ret = fence_signal_locked(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + } else + WARN(true, "fence process dismattch with job!\n"); +} + +static const char *amd_sched_fence_get_driver_name(struct fence *fence) +{ + return "amd_sched"; +} + +static const char *amd_sched_fence_get_timeline_name(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + return (const char *)fence->entity->name; +} + +static bool amd_sched_fence_enable_signaling(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return !amd_sched_check_ts(fence->entity, fence->v_seq); +} + +static bool amd_sched_fence_is_signaled(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return amd_sched_check_ts(fence->entity, fence->v_seq); +} + +const struct fence_ops amd_sched_fence_ops = { + .get_driver_name = amd_sched_fence_get_driver_name, + .get_timeline_name = amd_sched_fence_get_timeline_name, + .enable_signaling = amd_sched_fence_enable_signaling, + .signaled = amd_sched_fence_is_signaled, + .wait = fence_default_wait, + .release = NULL, +}; -- 2.34.1