From 8d9d5744c6bc861dbd379fee6dd5633a62f85be4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Apr 2015 16:20:38 +0100 Subject: [PATCH] drm/i915: Split batch pool into size buckets Now with the trimmed memcpy before the command parser, we try to allocate many different sizes of batches, predominantly one or two pages. We can therefore speed up searching for a good sized batch by keeping the objects of buckets of roughly the same size. v2: Add a comment about bucket sizes Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 46 +++++++++++++------- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 49 ++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_batch_pool.h | 2 +- 5 files changed, 64 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f610a2cd2088..11eebc28775a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m, struct drm_i915_gem_object *obj; struct file_stats stats; struct intel_engine_cs *ring; - int i; + int i, j; memset(&stats, 0, sizeof(stats)); for_each_ring(ring, dev_priv, i) { - list_for_each_entry(obj, - &ring->batch_pool.cache_list, - batch_pool_list) - per_file_stats(0, obj, &stats); + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) { + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) + per_file_stats(0, obj, &stats); + } } print_file_stats(m, "batch pool", stats); @@ -618,26 +620,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct intel_engine_cs *ring; - int count = 0; - int ret, i; + int total = 0; + int ret, i, j; ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; for_each_ring(ring, dev_priv, i) { - seq_printf(m, "%s cache:\n", ring->name); - list_for_each_entry(obj, - &ring->batch_pool.cache_list, - batch_pool_list) { - seq_puts(m, " "); - describe_obj(m, obj); - seq_putc(m, '\n'); - count++; + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) { + int count; + + count = 0; + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) + count++; + seq_printf(m, "%s cache[%d]: %d objects\n", + ring->name, j, count); + + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) { + seq_puts(m, " "); + describe_obj(m, obj); + seq_putc(m, '\n'); + } + + total += count; } } - seq_printf(m, "total: %d\n", count); + seq_printf(m, "total: %d\n", total); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c839b5771a83..6e0a0ae34a3f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1910,7 +1910,7 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; - struct list_head batch_pool_list; + struct list_head batch_pool_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ec5d7e010db..8feafe9be1f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4409,7 +4409,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); - INIT_LIST_HEAD(&obj->batch_pool_list); + INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 1287abf55b84..7bf2f3f2968e 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -47,8 +47,12 @@ void i915_gem_batch_pool_init(struct drm_device *dev, struct i915_gem_batch_pool *pool) { + int n; + pool->dev = dev; - INIT_LIST_HEAD(&pool->cache_list); + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + INIT_LIST_HEAD(&pool->cache_list[n]); } /** @@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev, */ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) { + int n; + WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); - while (!list_empty(&pool->cache_list)) { - struct drm_i915_gem_object *obj = - list_first_entry(&pool->cache_list, - struct drm_i915_gem_object, - batch_pool_list); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + while (!list_empty(&pool->cache_list[n])) { + struct drm_i915_gem_object *obj = + list_first_entry(&pool->cache_list[n], + struct drm_i915_gem_object, + batch_pool_link); - list_del(&obj->batch_pool_list); - drm_gem_object_unreference(&obj->base); + list_del(&obj->batch_pool_link); + drm_gem_object_unreference(&obj->base); + } } } @@ -91,28 +99,33 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, { struct drm_i915_gem_object *obj = NULL; struct drm_i915_gem_object *tmp, *next; + struct list_head *list; + int n; WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); - list_for_each_entry_safe(tmp, next, - &pool->cache_list, batch_pool_list) { + /* Compute a power-of-two bucket, but throw everything greater than + * 16KiB into the same bucket: i.e. the the buckets hold objects of + * (1 page, 2 pages, 4 pages, 8+ pages). + */ + n = fls(size >> PAGE_SHIFT) - 1; + if (n >= ARRAY_SIZE(pool->cache_list)) + n = ARRAY_SIZE(pool->cache_list) - 1; + list = &pool->cache_list[n]; + + list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ if (tmp->active) break; /* While we're looping, do some clean up */ if (tmp->madv == __I915_MADV_PURGED) { - list_del(&tmp->batch_pool_list); + list_del(&tmp->batch_pool_link); drm_gem_object_unreference(&tmp->base); continue; } - /* - * Select a buffer that is at least as big as needed - * but not 'too much' bigger. A better way to do this - * might be to bucket the pool objects based on size. - */ - if (tmp->base.size >= size && tmp->base.size <= 2 * size) { + if (tmp->base.size >= size) { obj = tmp; break; } @@ -132,7 +145,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, obj->madv = I915_MADV_DONTNEED; } - list_move_tail(&obj->batch_pool_list, &pool->cache_list); + list_move_tail(&obj->batch_pool_link, list); i915_gem_object_pin_pages(obj); return obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 5ed70ef6a887..848e90703eed 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -29,7 +29,7 @@ struct i915_gem_batch_pool { struct drm_device *dev; - struct list_head cache_list; + struct list_head cache_list[4]; }; /* i915_gem_batch_pool.c */ -- 2.34.1