From 5d481f497559245ecfb1b95cafe39bfbf037fda5 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Sat, 19 Jul 2008 14:51:31 +0800 Subject: [PATCH] Blackfin arch: change L1 malloc to base on slab cache and lists. Remove the sram piece limitation and improve the performance to alloc/free sram piece data. Signed-off-by: Sonic Zhang Signed-off-by: Bryan Wu --- arch/blackfin/mm/blackfin_sram.c | 395 +++++++++++++++++++------------ arch/blackfin/mm/blackfin_sram.h | 4 +- arch/blackfin/mm/init.c | 12 +- 3 files changed, 248 insertions(+), 163 deletions(-) diff --git a/arch/blackfin/mm/blackfin_sram.c b/arch/blackfin/mm/blackfin_sram.c index 8f6fdc245330..b58cf196d7cc 100644 --- a/arch/blackfin/mm/blackfin_sram.c +++ b/arch/blackfin/mm/blackfin_sram.c @@ -41,215 +41,276 @@ #include #include "blackfin_sram.h" -spinlock_t l1sram_lock, l1_data_sram_lock, l1_inst_sram_lock; - -#if CONFIG_L1_MAX_PIECE < 16 -#undef CONFIG_L1_MAX_PIECE -#define CONFIG_L1_MAX_PIECE 16 -#endif - -#if CONFIG_L1_MAX_PIECE > 1024 -#undef CONFIG_L1_MAX_PIECE -#define CONFIG_L1_MAX_PIECE 1024 -#endif - -#define SRAM_SLT_NULL 0 -#define SRAM_SLT_FREE 1 -#define SRAM_SLT_ALLOCATED 2 +static spinlock_t l1sram_lock, l1_data_sram_lock, l1_inst_sram_lock; /* the data structure for L1 scratchpad and DATA SRAM */ -struct l1_sram_piece { +struct sram_piece { void *paddr; int size; - int flag; pid_t pid; + struct sram_piece *next; }; -static struct l1_sram_piece l1_ssram[CONFIG_L1_MAX_PIECE]; +static struct sram_piece free_l1_ssram_head, used_l1_ssram_head; #if L1_DATA_A_LENGTH != 0 -static struct l1_sram_piece l1_data_A_sram[CONFIG_L1_MAX_PIECE]; +static struct sram_piece free_l1_data_A_sram_head, used_l1_data_A_sram_head; #endif #if L1_DATA_B_LENGTH != 0 -static struct l1_sram_piece l1_data_B_sram[CONFIG_L1_MAX_PIECE]; +static struct sram_piece free_l1_data_B_sram_head, used_l1_data_B_sram_head; #endif #if L1_CODE_LENGTH != 0 -static struct l1_sram_piece l1_inst_sram[CONFIG_L1_MAX_PIECE]; +static struct sram_piece free_l1_inst_sram_head, used_l1_inst_sram_head; #endif +static struct kmem_cache *sram_piece_cache; + /* L1 Scratchpad SRAM initialization function */ -void __init l1sram_init(void) +static void __init l1sram_init(void) { - printk(KERN_INFO "Blackfin Scratchpad data SRAM: %d KB\n", - L1_SCRATCH_LENGTH >> 10); + free_l1_ssram_head.next = + kmem_cache_alloc(sram_piece_cache, GFP_KERNEL); + if (!free_l1_ssram_head.next) { + printk(KERN_INFO"Fail to initialize Scratchpad data SRAM.\n"); + return; + } - memset(&l1_ssram, 0x00, sizeof(l1_ssram)); - l1_ssram[0].paddr = (void *)L1_SCRATCH_START; - l1_ssram[0].size = L1_SCRATCH_LENGTH; - l1_ssram[0].flag = SRAM_SLT_FREE; + free_l1_ssram_head.next->paddr = (void *)L1_SCRATCH_START; + free_l1_ssram_head.next->size = L1_SCRATCH_LENGTH; + free_l1_ssram_head.next->pid = 0; + free_l1_ssram_head.next->next = NULL; + + used_l1_ssram_head.next = NULL; /* mutex initialize */ spin_lock_init(&l1sram_lock); + + printk(KERN_INFO "Blackfin Scratchpad data SRAM: %d KB\n", + L1_SCRATCH_LENGTH >> 10); } -void __init l1_data_sram_init(void) +static void __init l1_data_sram_init(void) { #if L1_DATA_A_LENGTH != 0 - memset(&l1_data_A_sram, 0x00, sizeof(l1_data_A_sram)); - l1_data_A_sram[0].paddr = (void *)L1_DATA_A_START + - (_ebss_l1 - _sdata_l1); - l1_data_A_sram[0].size = L1_DATA_A_LENGTH - (_ebss_l1 - _sdata_l1); - l1_data_A_sram[0].flag = SRAM_SLT_FREE; + free_l1_data_A_sram_head.next = + kmem_cache_alloc(sram_piece_cache, GFP_KERNEL); + if (!free_l1_data_A_sram_head.next) { + printk(KERN_INFO"Fail to initialize Data A SRAM.\n"); + return; + } + + free_l1_data_A_sram_head.next->paddr = + (void *)L1_DATA_A_START + (_ebss_l1 - _sdata_l1); + free_l1_data_A_sram_head.next->size = + L1_DATA_A_LENGTH - (_ebss_l1 - _sdata_l1); + free_l1_data_A_sram_head.next->pid = 0; + free_l1_data_A_sram_head.next->next = NULL; + + used_l1_data_A_sram_head.next = NULL; printk(KERN_INFO "Blackfin Data A SRAM: %d KB (%d KB free)\n", - L1_DATA_A_LENGTH >> 10, l1_data_A_sram[0].size >> 10); + L1_DATA_A_LENGTH >> 10, + free_l1_data_A_sram_head.next->size >> 10); #endif #if L1_DATA_B_LENGTH != 0 - memset(&l1_data_B_sram, 0x00, sizeof(l1_data_B_sram)); - l1_data_B_sram[0].paddr = (void *)L1_DATA_B_START + - (_ebss_b_l1 - _sdata_b_l1); - l1_data_B_sram[0].size = L1_DATA_B_LENGTH - (_ebss_b_l1 - _sdata_b_l1); - l1_data_B_sram[0].flag = SRAM_SLT_FREE; + free_l1_data_B_sram_head.next = + kmem_cache_alloc(sram_piece_cache, GFP_KERNEL); + if (!free_l1_data_B_sram_head.next) { + printk(KERN_INFO"Fail to initialize Data B SRAM.\n"); + return; + } + + free_l1_data_B_sram_head.next->paddr = + (void *)L1_DATA_B_START + (_ebss_b_l1 - _sdata_b_l1); + free_l1_data_B_sram_head.next->size = + L1_DATA_B_LENGTH - (_ebss_b_l1 - _sdata_b_l1); + free_l1_data_B_sram_head.next->pid = 0; + free_l1_data_B_sram_head.next->next = NULL; + + used_l1_data_B_sram_head.next = NULL; printk(KERN_INFO "Blackfin Data B SRAM: %d KB (%d KB free)\n", - L1_DATA_B_LENGTH >> 10, l1_data_B_sram[0].size >> 10); + L1_DATA_B_LENGTH >> 10, + free_l1_data_B_sram_head.next->size >> 10); #endif /* mutex initialize */ spin_lock_init(&l1_data_sram_lock); } -void __init l1_inst_sram_init(void) +static void __init l1_inst_sram_init(void) { #if L1_CODE_LENGTH != 0 - memset(&l1_inst_sram, 0x00, sizeof(l1_inst_sram)); - l1_inst_sram[0].paddr = (void *)L1_CODE_START + (_etext_l1 - _stext_l1); - l1_inst_sram[0].size = L1_CODE_LENGTH - (_etext_l1 - _stext_l1); - l1_inst_sram[0].flag = SRAM_SLT_FREE; + free_l1_inst_sram_head.next = + kmem_cache_alloc(sram_piece_cache, GFP_KERNEL); + if (!free_l1_inst_sram_head.next) { + printk(KERN_INFO"Fail to initialize Instruction SRAM.\n"); + return; + } + + free_l1_inst_sram_head.next->paddr = + (void *)L1_CODE_START + (_etext_l1 - _stext_l1); + free_l1_inst_sram_head.next->size = + L1_CODE_LENGTH - (_etext_l1 - _stext_l1); + free_l1_inst_sram_head.next->pid = 0; + free_l1_inst_sram_head.next->next = NULL; + + used_l1_inst_sram_head.next = NULL; printk(KERN_INFO "Blackfin Instruction SRAM: %d KB (%d KB free)\n", - L1_CODE_LENGTH >> 10, l1_inst_sram[0].size >> 10); + L1_CODE_LENGTH >> 10, + free_l1_inst_sram_head.next->size >> 10); #endif /* mutex initialize */ spin_lock_init(&l1_inst_sram_lock); } +void __init bfin_sram_init(void) +{ + sram_piece_cache = kmem_cache_create("sram_piece_cache", + sizeof(struct sram_piece), + 0, SLAB_PANIC, NULL); + + l1sram_init(); + l1_data_sram_init(); + l1_inst_sram_init(); +} + /* L1 memory allocate function */ -static void *_l1_sram_alloc(size_t size, struct l1_sram_piece *pfree, int count) +static void *_l1_sram_alloc(size_t size, struct sram_piece *pfree_head, + struct sram_piece *pused_head) { - int i, index = 0; - void *addr = NULL; + struct sram_piece *pslot, *plast, *pavail; - if (size <= 0) + if (size <= 0 || !pfree_head || !pused_head) return NULL; /* Align the size */ size = (size + 3) & ~3; - /* not use the good method to match the best slot !!! */ - /* search an available memory slot */ - for (i = 0; i < count; i++) { - if ((pfree[i].flag == SRAM_SLT_FREE) - && (pfree[i].size >= size)) { - addr = pfree[i].paddr; - pfree[i].flag = SRAM_SLT_ALLOCATED; - pfree[i].pid = current->pid; - index = i; - break; - } + pslot = pfree_head->next; + plast = pfree_head; + + /* search an available piece slot */ + while (pslot != NULL && size > pslot->size) { + plast = pslot; + pslot = pslot->next; } - if (i >= count) + + if (!pslot) return NULL; - /* updated the NULL memory slot !!! */ - if (pfree[i].size > size) { - for (i = 0; i < count; i++) { - if (pfree[i].flag == SRAM_SLT_NULL) { - pfree[i].pid = 0; - pfree[i].flag = SRAM_SLT_FREE; - pfree[i].paddr = addr + size; - pfree[i].size = pfree[index].size - size; - pfree[index].size = size; - break; - } - } + if (pslot->size == size) { + plast->next = pslot->next; + pavail = pslot; + } else { + pavail = kmem_cache_alloc(sram_piece_cache, GFP_KERNEL); + + if (!pavail) + return NULL; + + pavail->paddr = pslot->paddr; + pavail->size = size; + pslot->paddr += size; + pslot->size -= size; } - return addr; + pavail->pid = current->pid; + + pslot = pused_head->next; + plast = pused_head; + + /* insert new piece into used piece list !!! */ + while (pslot != NULL && pavail->paddr < pslot->paddr) { + plast = pslot; + pslot = pslot->next; + } + + pavail->next = pslot; + plast->next = pavail; + + return pavail->paddr; } /* Allocate the largest available block. */ -static void *_l1_sram_alloc_max(struct l1_sram_piece *pfree, int count, +static void *_l1_sram_alloc_max(struct sram_piece *pfree_head, + struct sram_piece *pused_head, unsigned long *psize) { - unsigned long best = 0; - int i, index = -1; - void *addr = NULL; + struct sram_piece *pslot, *pmax; + + if (!pfree_head || !pused_head) + return NULL; + + pmax = pslot = pfree_head->next; - /* search an available memory slot */ - for (i = 0; i < count; i++) { - if (pfree[i].flag == SRAM_SLT_FREE && pfree[i].size > best) { - addr = pfree[i].paddr; - index = i; - best = pfree[i].size; - } + /* search an available piece slot */ + while (pslot != NULL) { + if (pslot->size > pmax->size) + pmax = pslot; + pslot = pslot->next; } - if (index < 0) + + if (!pmax) return NULL; - *psize = best; - pfree[index].pid = current->pid; - pfree[index].flag = SRAM_SLT_ALLOCATED; - return addr; + *psize = pmax->size; + + return _l1_sram_alloc(*psize, pfree_head, pused_head); } /* L1 memory free function */ static int _l1_sram_free(const void *addr, - struct l1_sram_piece *pfree, - int count) + struct sram_piece *pfree_head, + struct sram_piece *pused_head) { - int i, index = 0; + struct sram_piece *pslot, *plast, *pavail; + + if (!pfree_head || !pused_head) + return -1; /* search the relevant memory slot */ - for (i = 0; i < count; i++) { - if (pfree[i].paddr == addr) { - if (pfree[i].flag != SRAM_SLT_ALLOCATED) { - /* error log */ - return -1; - } - index = i; - break; - } + pslot = pused_head->next; + plast = pused_head; + + /* search an available piece slot */ + while (pslot != NULL && pslot->paddr != addr) { + plast = pslot; + pslot = pslot->next; } - if (i >= count) + + if (!pslot) return -1; - pfree[index].pid = 0; - pfree[index].flag = SRAM_SLT_FREE; - - /* link the next address slot */ - for (i = 0; i < count; i++) { - if (((pfree[index].paddr + pfree[index].size) == pfree[i].paddr) - && (pfree[i].flag == SRAM_SLT_FREE)) { - pfree[i].pid = 0; - pfree[i].flag = SRAM_SLT_NULL; - pfree[index].size += pfree[i].size; - pfree[index].flag = SRAM_SLT_FREE; - break; - } + plast->next = pslot->next; + pavail = pslot; + pavail->pid = 0; + + /* insert free pieces back to the free list */ + pslot = pfree_head->next; + plast = pfree_head; + + while (pslot != NULL && addr > pslot->paddr) { + plast = pslot; + pslot = pslot->next; + } + + if (plast != pfree_head && plast->paddr + plast->size == pavail->paddr) { + plast->size += pavail->size; + kmem_cache_free(sram_piece_cache, pavail); + } else { + pavail->next = plast; + plast->next = pavail; + plast = pavail; } - /* link the last address slot */ - for (i = 0; i < count; i++) { - if (((pfree[i].paddr + pfree[i].size) == pfree[index].paddr) && - (pfree[i].flag == SRAM_SLT_FREE)) { - pfree[index].flag = SRAM_SLT_NULL; - pfree[i].size += pfree[index].size; - break; - } + if (pslot && plast->paddr + plast->size == pslot->paddr) { + plast->size += pslot->size; + plast->next = pslot->next; + kmem_cache_free(sram_piece_cache, pslot); } return 0; @@ -287,7 +348,8 @@ void *l1_data_A_sram_alloc(size_t size) spin_lock_irqsave(&l1_data_sram_lock, flags); #if L1_DATA_A_LENGTH != 0 - addr = _l1_sram_alloc(size, l1_data_A_sram, ARRAY_SIZE(l1_data_A_sram)); + addr = _l1_sram_alloc(size, &free_l1_data_A_sram_head, + &used_l1_data_A_sram_head); #endif /* add mutex operation */ @@ -309,8 +371,8 @@ int l1_data_A_sram_free(const void *addr) spin_lock_irqsave(&l1_data_sram_lock, flags); #if L1_DATA_A_LENGTH != 0 - ret = _l1_sram_free(addr, - l1_data_A_sram, ARRAY_SIZE(l1_data_A_sram)); + ret = _l1_sram_free(addr, &free_l1_data_A_sram_head, + &used_l1_data_A_sram_head); #else ret = -1; #endif @@ -331,7 +393,8 @@ void *l1_data_B_sram_alloc(size_t size) /* add mutex operation */ spin_lock_irqsave(&l1_data_sram_lock, flags); - addr = _l1_sram_alloc(size, l1_data_B_sram, ARRAY_SIZE(l1_data_B_sram)); + addr = _l1_sram_alloc(size, &free_l1_data_B_sram_head, + &used_l1_data_B_sram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1_data_sram_lock, flags); @@ -355,7 +418,8 @@ int l1_data_B_sram_free(const void *addr) /* add mutex operation */ spin_lock_irqsave(&l1_data_sram_lock, flags); - ret = _l1_sram_free(addr, l1_data_B_sram, ARRAY_SIZE(l1_data_B_sram)); + ret = _l1_sram_free(addr, &free_l1_data_B_sram_head, + &used_l1_data_B_sram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1_data_sram_lock, flags); @@ -408,7 +472,8 @@ void *l1_inst_sram_alloc(size_t size) /* add mutex operation */ spin_lock_irqsave(&l1_inst_sram_lock, flags); - addr = _l1_sram_alloc(size, l1_inst_sram, ARRAY_SIZE(l1_inst_sram)); + addr = _l1_sram_alloc(size, &free_l1_inst_sram_head, + &used_l1_inst_sram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1_inst_sram_lock, flags); @@ -432,7 +497,8 @@ int l1_inst_sram_free(const void *addr) /* add mutex operation */ spin_lock_irqsave(&l1_inst_sram_lock, flags); - ret = _l1_sram_free(addr, l1_inst_sram, ARRAY_SIZE(l1_inst_sram)); + ret = _l1_sram_free(addr, &free_l1_inst_sram_head, + &used_l1_inst_sram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1_inst_sram_lock, flags); @@ -453,7 +519,8 @@ void *l1sram_alloc(size_t size) /* add mutex operation */ spin_lock_irqsave(&l1sram_lock, flags); - addr = _l1_sram_alloc(size, l1_ssram, ARRAY_SIZE(l1_ssram)); + addr = _l1_sram_alloc(size, &free_l1_ssram_head, + &used_l1_ssram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1sram_lock, flags); @@ -470,7 +537,8 @@ void *l1sram_alloc_max(size_t *psize) /* add mutex operation */ spin_lock_irqsave(&l1sram_lock, flags); - addr = _l1_sram_alloc_max(l1_ssram, ARRAY_SIZE(l1_ssram), psize); + addr = _l1_sram_alloc_max(&free_l1_ssram_head, + &used_l1_ssram_head, psize); /* add mutex operation */ spin_unlock_irqrestore(&l1sram_lock, flags); @@ -487,7 +555,8 @@ int l1sram_free(const void *addr) /* add mutex operation */ spin_lock_irqsave(&l1sram_lock, flags); - ret = _l1_sram_free(addr, l1_ssram, ARRAY_SIZE(l1_ssram)); + ret = _l1_sram_free(addr, &free_l1_ssram_head, + &used_l1_ssram_head); /* add mutex operation */ spin_unlock_irqrestore(&l1sram_lock, flags); @@ -553,28 +622,38 @@ EXPORT_SYMBOL(sram_alloc_with_lsl); * (including newline). */ static int _l1sram_proc_read(char *buf, int *len, int count, const char *desc, - struct l1_sram_piece *pfree, const int array_size) + struct sram_piece *pfree_head, + struct sram_piece *pused_head) { - int i; + struct sram_piece *pslot; + + if (!pfree_head || !pused_head) + return -1; *len += sprintf(&buf[*len], "--- L1 %-14s Size PID State \n", desc); - for (i = 0; i < array_size && *len < count; ++i) { - const char *alloc_type; - switch (pfree[i].flag) { - case SRAM_SLT_NULL: alloc_type = "NULL"; break; - case SRAM_SLT_FREE: alloc_type = "FREE"; break; - case SRAM_SLT_ALLOCATED: alloc_type = "ALLOCATED"; break; - default: alloc_type = "????"; break; - } - /* if we've got a lot of space to cover, omit things */ - if ((PAGE_SIZE - 1024) < (CONFIG_L1_MAX_PIECE + 1) * 4 * 44 && - pfree[i].size == 0) - continue; + + /* search the relevant memory slot */ + pslot = pused_head->next; + + while (pslot != NULL) { *len += sprintf(&buf[*len], "%p-%p %8i %5i %-10s\n", - pfree[i].paddr, pfree[i].paddr + pfree[i].size, - pfree[i].size, pfree[i].pid, alloc_type); + pslot->paddr, pslot->paddr + pslot->size, + pslot->size, pslot->pid, "ALLOCATED"); + + pslot = pslot->next; + } + + pslot = pfree_head->next; + + while (pslot != NULL) { + *len += sprintf(&buf[*len], "%p-%p %8i %5i %-10s\n", + pslot->paddr, pslot->paddr + pslot->size, + pslot->size, pslot->pid, "FREE"); + + pslot = pslot->next; } - return (i != array_size); + + return 0; } static int l1sram_proc_read(char *buf, char **start, off_t offset, int count, int *eof, void *data) @@ -582,21 +661,23 @@ static int l1sram_proc_read(char *buf, char **start, off_t offset, int count, int len = 0; if (_l1sram_proc_read(buf, &len, count, "Scratchpad", - l1_ssram, ARRAY_SIZE(l1_ssram))) + &free_l1_ssram_head, &used_l1_ssram_head)) goto not_done; #if L1_DATA_A_LENGTH != 0 if (_l1sram_proc_read(buf, &len, count, "Data A", - l1_data_A_sram, ARRAY_SIZE(l1_data_A_sram))) + &free_l1_data_A_sram_head, + &used_l1_data_A_sram_head)) goto not_done; #endif #if L1_DATA_B_LENGTH != 0 if (_l1sram_proc_read(buf, &len, count, "Data B", - l1_data_B_sram, ARRAY_SIZE(l1_data_B_sram))) + &free_l1_data_B_sram_head, + &used_l1_data_B_sram_head)) goto not_done; #endif #if L1_CODE_LENGTH != 0 if (_l1sram_proc_read(buf, &len, count, "Instruction", - l1_inst_sram, ARRAY_SIZE(l1_inst_sram))) + &free_l1_inst_sram_head, &used_l1_inst_sram_head)) goto not_done; #endif diff --git a/arch/blackfin/mm/blackfin_sram.h b/arch/blackfin/mm/blackfin_sram.h index 0fb73b78dd60..8cb0945563f9 100644 --- a/arch/blackfin/mm/blackfin_sram.h +++ b/arch/blackfin/mm/blackfin_sram.h @@ -30,9 +30,7 @@ #ifndef __BLACKFIN_SRAM_H__ #define __BLACKFIN_SRAM_H__ -extern void l1sram_init(void); -extern void l1_inst_sram_init(void); -extern void l1_data_sram_init(void); +extern void bfin_sram_init(void); extern void *l1sram_alloc(size_t); #endif diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index ec3141fefd20..4aab21f44096 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -164,11 +164,14 @@ void __init mem_init(void) "(%uk init code, %uk kernel code, %uk data, %uk dma, %uk reserved)\n", (unsigned long) freepages << (PAGE_SHIFT-10), _ramend >> 10, initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10))); +} + +static int __init sram_init(void) +{ + unsigned long tmp; /* Initialize the blackfin L1 Memory. */ - l1sram_init(); - l1_data_sram_init(); - l1_inst_sram_init(); + bfin_sram_init(); /* Allocate this once; never free it. We assume this gives us a pointer to the start of L1 scratchpad memory; panic if it @@ -179,7 +182,10 @@ void __init mem_init(void) tmp, (unsigned long)L1_SCRATCH_TASK_INFO); panic("No L1, time to give up\n"); } + + return 0; } +pure_initcall(sram_init); static void __init free_init_pages(const char *what, unsigned long begin, unsigned long end) { -- 2.34.1