2 * In-kernel transcendent memory (generic implementation)
4 * Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp.
6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
7 * "handles" (triples containing a pool id, and object id, and an index), to
8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via
9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10 * set of functions (pamops). Each pampd contains some representation of
11 * PAGE_SIZE bytes worth of data. For those familiar with key-value stores,
12 * the tmem handle is a three-level hierarchical key, and the value is always
13 * reconstituted (but not necessarily stored) as PAGE_SIZE bytes and is
14 * referenced in the datastore by the pampd. The hierarchy is required
15 * to ensure that certain invalidation functions can be performed efficiently
16 * (i.e. flush all indexes associated with this object_id, or
17 * flush all objects associated with this pool).
19 * Tmem must support potentially millions of pages and must be able to insert,
20 * find, and delete these pages at a potential frequency of thousands per
21 * second concurrently across many CPUs, (and, if used with KVM, across many
22 * vcpus across many guests). Tmem is tracked with a hierarchy of data
23 * structures, organized by the elements in the handle-tuple: pool_id,
24 * object_id, and page index. One or more "clients" (e.g. guests) each
25 * provide one or more tmem_pools. Each pool, contains a hash table of
26 * rb_trees of tmem_objs. Each tmem_obj contains a radix-tree-like tree
27 * of pointers, with intermediate nodes called tmem_objnodes. Each leaf
28 * pointer in this tree points to a pampd, which is accessible only through
29 * a small set of callbacks registered by the PAM implementation (see
30 * tmem_register_pamops). Tmem only needs to memory allocation for objs
31 * and objnodes and this is done via a set of callbacks that must be
32 * registered by the tmem host implementation (e.g. see tmem_register_hostops).
35 #include <linux/list.h>
36 #include <linux/spinlock.h>
37 #include <linux/atomic.h>
38 #include <linux/export.h>
39 #if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
40 #include <linux/delay.h>
45 /* data structure sentinels used for debugging... see tmem.h */
46 #define POOL_SENTINEL 0x87658765
47 #define OBJ_SENTINEL 0x12345678
48 #define OBJNODE_SENTINEL 0xfedcba09
51 * A tmem host implementation must use this function to register callbacks
52 * for memory allocation.
54 static struct tmem_hostops tmem_hostops;
56 static void tmem_objnode_tree_init(void);
58 void tmem_register_hostops(struct tmem_hostops *m)
60 tmem_objnode_tree_init();
65 * A tmem host implementation must use this function to register
66 * callbacks for a page-accessible memory (PAM) implementation.
68 static struct tmem_pamops tmem_pamops;
70 void tmem_register_pamops(struct tmem_pamops *m)
76 * Oid's are potentially very sparse and tmem_objs may have an indeterminately
77 * short life, being added and deleted at a relatively high frequency.
78 * So an rb_tree is an ideal data structure to manage tmem_objs. But because
79 * of the potentially huge number of tmem_objs, each pool manages a hashtable
80 * of rb_trees to reduce search, insert, delete, and rebalancing time.
81 * Each hashbucket also has a lock to manage concurrent access and no
82 * searches, inserts, or deletions can be performed unless the lock is held.
83 * As a result, care must be taken to ensure tmem routines are not called
84 * recursively; the vast majority of the time, a recursive call may work
85 * but a deadlock will occur a small fraction of the time due to the
88 * The following routines manage tmem_objs. In all of these routines,
89 * the hashbucket lock is already held.
92 /* Search for object==oid in pool, returns object if found. */
93 static struct tmem_obj *__tmem_obj_find(struct tmem_hashbucket *hb,
94 struct tmem_oid *oidp,
95 struct rb_node **parent,
96 struct rb_node ***link)
98 struct rb_node *_parent = NULL, **rbnode;
99 struct tmem_obj *obj = NULL;
101 rbnode = &hb->obj_rb_root.rb_node;
103 BUG_ON(RB_EMPTY_NODE(*rbnode));
105 obj = rb_entry(*rbnode, struct tmem_obj,
107 switch (tmem_oid_compare(oidp, &obj->oid)) {
111 rbnode = &(*rbnode)->rb_left;
114 rbnode = &(*rbnode)->rb_right;
128 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
129 struct tmem_oid *oidp)
131 return __tmem_obj_find(hb, oidp, NULL, NULL);
134 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *, bool);
136 /* Free an object that has no more pampds in it. */
137 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
139 struct tmem_pool *pool;
142 ASSERT_SENTINEL(obj, OBJ);
143 BUG_ON(obj->pampd_count > 0);
145 BUG_ON(pool == NULL);
146 if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
147 tmem_pampd_destroy_all_in_obj(obj, false);
148 BUG_ON(obj->objnode_tree_root != NULL);
149 BUG_ON((long)obj->objnode_count != 0);
150 atomic_dec(&pool->obj_count);
151 BUG_ON(atomic_read(&pool->obj_count) < 0);
152 INVERT_SENTINEL(obj, OBJ);
154 tmem_oid_set_invalid(&obj->oid);
155 rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
159 * Initialize, and insert an tmem_object_root (called only if find failed).
161 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
162 struct tmem_pool *pool,
163 struct tmem_oid *oidp)
165 struct rb_root *root = &hb->obj_rb_root;
166 struct rb_node **new = NULL, *parent = NULL;
168 BUG_ON(pool == NULL);
169 atomic_inc(&pool->obj_count);
170 obj->objnode_tree_height = 0;
171 obj->objnode_tree_root = NULL;
174 obj->objnode_count = 0;
175 obj->pampd_count = 0;
176 #ifdef CONFIG_RAMSTER
177 if (tmem_pamops.new_obj != NULL)
178 (*tmem_pamops.new_obj)(obj);
180 SET_SENTINEL(obj, OBJ);
182 if (__tmem_obj_find(hb, oidp, &parent, &new))
185 rb_link_node(&obj->rb_tree_node, parent, new);
186 rb_insert_color(&obj->rb_tree_node, root);
190 * Tmem is managed as a set of tmem_pools with certain attributes, such as
191 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs
192 * and all pampds that belong to a tmem_pool. A tmem_pool is created
193 * or deleted relatively rarely (for example, when a filesystem is
194 * mounted or unmounted).
197 /* flush all data from a pool and, optionally, free it */
198 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
200 struct rb_node *rbnode;
201 struct tmem_obj *obj;
202 struct tmem_hashbucket *hb = &pool->hashbucket[0];
205 BUG_ON(pool == NULL);
206 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
207 spin_lock(&hb->lock);
208 rbnode = rb_first(&hb->obj_rb_root);
209 while (rbnode != NULL) {
210 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
211 rbnode = rb_next(rbnode);
212 tmem_pampd_destroy_all_in_obj(obj, true);
213 tmem_obj_free(obj, hb);
214 (*tmem_hostops.obj_free)(obj, pool);
216 spin_unlock(&hb->lock);
219 list_del(&pool->pool_list);
223 * A tmem_obj contains a radix-tree-like tree in which the intermediate
224 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
225 * is very specialized and tuned for specific uses and is not particularly
226 * suited for use from this code, though some code from the core algorithms has
227 * been reused, thus the copyright notices below). Each tmem_objnode contains
228 * a set of pointers which point to either a set of intermediate tmem_objnodes
229 * or a set of of pampds.
231 * Portions Copyright (C) 2001 Momchil Velikov
232 * Portions Copyright (C) 2001 Christoph Hellwig
233 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
236 struct tmem_objnode_tree_path {
237 struct tmem_objnode *objnode;
241 /* objnode height_to_maxindex translation */
242 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
244 static void tmem_objnode_tree_init(void)
246 unsigned int ht, tmp;
248 for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
249 tmp = ht * OBJNODE_TREE_MAP_SHIFT;
250 if (tmp >= OBJNODE_TREE_INDEX_BITS)
251 tmem_objnode_tree_h2max[ht] = ~0UL;
253 tmem_objnode_tree_h2max[ht] =
254 (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
258 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
260 struct tmem_objnode *objnode;
262 ASSERT_SENTINEL(obj, OBJ);
263 BUG_ON(obj->pool == NULL);
264 ASSERT_SENTINEL(obj->pool, POOL);
265 objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
266 if (unlikely(objnode == NULL))
269 SET_SENTINEL(objnode, OBJNODE);
270 memset(&objnode->slots, 0, sizeof(objnode->slots));
271 objnode->slots_in_use = 0;
272 obj->objnode_count++;
277 static void tmem_objnode_free(struct tmem_objnode *objnode)
279 struct tmem_pool *pool;
282 BUG_ON(objnode == NULL);
283 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
284 BUG_ON(objnode->slots[i] != NULL);
285 ASSERT_SENTINEL(objnode, OBJNODE);
286 INVERT_SENTINEL(objnode, OBJNODE);
287 BUG_ON(objnode->obj == NULL);
288 ASSERT_SENTINEL(objnode->obj, OBJ);
289 pool = objnode->obj->pool;
290 BUG_ON(pool == NULL);
291 ASSERT_SENTINEL(pool, POOL);
292 objnode->obj->objnode_count--;
294 (*tmem_hostops.objnode_free)(objnode, pool);
298 * Lookup index in object and return associated pampd (or NULL if not found).
300 static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
302 unsigned int height, shift;
303 struct tmem_objnode **slot = NULL;
306 ASSERT_SENTINEL(obj, OBJ);
307 BUG_ON(obj->pool == NULL);
308 ASSERT_SENTINEL(obj->pool, POOL);
310 height = obj->objnode_tree_height;
311 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
313 if (height == 0 && obj->objnode_tree_root) {
314 slot = &obj->objnode_tree_root;
317 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
318 slot = &obj->objnode_tree_root;
322 slot = (struct tmem_objnode **)
324 ((index >> shift) & OBJNODE_TREE_MAP_MASK));
325 shift -= OBJNODE_TREE_MAP_SHIFT;
329 return slot != NULL ? (void **)slot : NULL;
332 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
334 struct tmem_objnode **slot;
336 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
337 return slot != NULL ? *slot : NULL;
340 #ifdef CONFIG_RAMSTER
341 static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
342 void *new_pampd, bool no_free)
344 struct tmem_objnode **slot;
347 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
348 if ((slot != NULL) && (*slot != NULL)) {
349 void *old_pampd = *(void **)slot;
350 *(void **)slot = new_pampd;
352 (*tmem_pamops.free)(old_pampd, obj->pool,
360 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
364 struct tmem_objnode *objnode = NULL, *newnode, *slot;
365 unsigned int height, shift;
368 /* if necessary, extend the tree to be higher */
369 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
370 height = obj->objnode_tree_height + 1;
371 if (index > tmem_objnode_tree_h2max[height])
372 while (index > tmem_objnode_tree_h2max[height])
374 if (obj->objnode_tree_root == NULL) {
375 obj->objnode_tree_height = height;
379 newnode = tmem_objnode_alloc(obj);
384 newnode->slots[0] = obj->objnode_tree_root;
385 newnode->slots_in_use = 1;
386 obj->objnode_tree_root = newnode;
387 obj->objnode_tree_height++;
388 } while (height > obj->objnode_tree_height);
391 slot = obj->objnode_tree_root;
392 height = obj->objnode_tree_height;
393 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
396 /* add a child objnode. */
397 slot = tmem_objnode_alloc(obj);
404 objnode->slots[offset] = slot;
405 objnode->slots_in_use++;
407 obj->objnode_tree_root = slot;
409 /* go down a level */
410 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
412 slot = objnode->slots[offset];
413 shift -= OBJNODE_TREE_MAP_SHIFT;
416 BUG_ON(slot != NULL);
418 objnode->slots_in_use++;
419 objnode->slots[offset] = pampd;
421 obj->objnode_tree_root = pampd;
427 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
429 struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
430 struct tmem_objnode_tree_path *pathp = path;
431 struct tmem_objnode *slot = NULL;
432 unsigned int height, shift;
436 ASSERT_SENTINEL(obj, OBJ);
437 BUG_ON(obj->pool == NULL);
438 ASSERT_SENTINEL(obj->pool, POOL);
439 height = obj->objnode_tree_height;
440 if (index > tmem_objnode_tree_h2max[height])
442 slot = obj->objnode_tree_root;
443 if (height == 0 && obj->objnode_tree_root) {
444 obj->objnode_tree_root = NULL;
447 shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
448 pathp->objnode = NULL;
453 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
454 pathp->offset = offset;
455 pathp->objnode = slot;
456 slot = slot->slots[offset];
457 shift -= OBJNODE_TREE_MAP_SHIFT;
459 } while (height > 0);
462 while (pathp->objnode) {
463 pathp->objnode->slots[pathp->offset] = NULL;
464 pathp->objnode->slots_in_use--;
465 if (pathp->objnode->slots_in_use) {
466 if (pathp->objnode == obj->objnode_tree_root) {
467 while (obj->objnode_tree_height > 0 &&
468 obj->objnode_tree_root->slots_in_use == 1 &&
469 obj->objnode_tree_root->slots[0]) {
470 struct tmem_objnode *to_free =
471 obj->objnode_tree_root;
473 obj->objnode_tree_root =
475 obj->objnode_tree_height--;
476 to_free->slots[0] = NULL;
477 to_free->slots_in_use = 0;
478 tmem_objnode_free(to_free);
483 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
486 obj->objnode_tree_height = 0;
487 obj->objnode_tree_root = NULL;
492 BUG_ON(obj->pampd_count < 0);
496 /* Recursively walk the objnode_tree destroying pampds and objnodes. */
497 static void tmem_objnode_node_destroy(struct tmem_obj *obj,
498 struct tmem_objnode *objnode,
505 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
506 if (objnode->slots[i]) {
509 (*tmem_pamops.free)(objnode->slots[i],
510 obj->pool, NULL, 0, true);
511 objnode->slots[i] = NULL;
514 tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
515 tmem_objnode_free(objnode->slots[i]);
516 objnode->slots[i] = NULL;
521 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj,
524 if (obj->objnode_tree_root == NULL)
526 if (obj->objnode_tree_height == 0) {
528 (*tmem_pamops.free)(obj->objnode_tree_root,
529 obj->pool, NULL, 0, true);
531 tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
532 obj->objnode_tree_height);
533 tmem_objnode_free(obj->objnode_tree_root);
534 obj->objnode_tree_height = 0;
536 obj->objnode_tree_root = NULL;
537 #ifdef CONFIG_RAMSTER
538 if (tmem_pamops.free_obj != NULL)
539 (*tmem_pamops.free_obj)(obj->pool, obj, pool_destroy);
544 * Tmem is operated on by a set of well-defined actions:
545 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
546 * (The tmem ABI allows for subpages and exchanges but these operations
547 * are not included in this implementation.)
549 * These "tmem core" operations are implemented in the following functions.
553 * "Put" a page, e.g. associate the passed pampd with the passed handle.
554 * Tmem_put is complicated by a corner case: What if a page with matching
555 * handle already exists in tmem? To guarantee coherency, one of two
556 * actions is necessary: Either the data for the page must be overwritten,
557 * or the page must be "flushed" so that the data is not accessible to a
558 * subsequent "get". Since these "duplicate puts" are relatively rare,
559 * this implementation always flushes for simplicity.
561 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
562 bool raw, void *pampd_to_use)
564 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
565 void *pampd = NULL, *pampd_del = NULL;
567 struct tmem_hashbucket *hb;
569 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
570 spin_lock(&hb->lock);
571 obj = objfound = tmem_obj_find(hb, oidp);
573 pampd = tmem_pampd_lookup_in_obj(objfound, index);
575 /* if found, is a dup put, flush the old one */
576 pampd_del = tmem_pampd_delete_from_obj(obj, index);
577 BUG_ON(pampd_del != pampd);
578 (*tmem_pamops.free)(pampd, pool, oidp, index, true);
579 if (obj->pampd_count == 0) {
586 obj = objnew = (*tmem_hostops.obj_alloc)(pool);
587 if (unlikely(obj == NULL)) {
591 tmem_obj_init(obj, hb, pool, oidp);
594 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
595 pampd = pampd_to_use;
596 BUG_ON(pampd_to_use == NULL);
597 ret = tmem_pampd_add_to_obj(obj, index, pampd);
598 if (unlikely(ret == -ENOMEM))
599 /* may have partially built objnode tree ("stump") */
600 goto delete_and_free;
601 (*tmem_pamops.create_finish)(pampd, is_ephemeral(pool));
605 (void)tmem_pampd_delete_from_obj(obj, index);
607 (*tmem_pamops.free)(pampd, pool, NULL, 0, true);
609 tmem_obj_free(objnew, hb);
610 (*tmem_hostops.obj_free)(objnew, pool);
613 spin_unlock(&hb->lock);
617 #ifdef CONFIG_RAMSTER
619 * For ramster only: The following routines provide a two-step sequence
620 * to allow the caller to replace a pampd in the tmem data structures with
621 * another pampd. Here, we lookup the passed handle and, if found, return the
622 * associated pampd and object, leaving the hashbucket locked and returning
623 * a reference to it. The caller is expected to immediately call the
624 * matching tmem_localify_finish routine which will handles the replacement
625 * and unlocks the hashbucket.
627 void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp,
628 uint32_t index, struct tmem_obj **ret_obj,
631 struct tmem_hashbucket *hb;
632 struct tmem_obj *obj = NULL;
635 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
636 spin_lock(&hb->lock);
637 obj = tmem_obj_find(hb, oidp);
638 if (likely(obj != NULL))
639 pampd = tmem_pampd_lookup_in_obj(obj, index);
641 *saved_hb = (void *)hb;
642 /* note, hashbucket remains locked */
645 EXPORT_SYMBOL_GPL(tmem_localify_get_pampd);
647 void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,
648 void *pampd, void *saved_hb, bool delete)
650 struct tmem_hashbucket *hb = (struct tmem_hashbucket *)saved_hb;
652 BUG_ON(!spin_is_locked(&hb->lock));
655 (void)tmem_pampd_replace_in_obj(obj, index, pampd, 1);
656 (*tmem_pamops.create_finish)(pampd, is_ephemeral(obj->pool));
659 (void)tmem_pampd_delete_from_obj(obj, index);
661 spin_unlock(&hb->lock);
663 EXPORT_SYMBOL_GPL(tmem_localify_finish);
666 * For ramster only. Helper function to support asynchronous tmem_get.
668 static int tmem_repatriate(void **ppampd, struct tmem_hashbucket *hb,
669 struct tmem_pool *pool, struct tmem_oid *oidp,
670 uint32_t index, bool free, char *data)
672 void *old_pampd = *ppampd, *new_pampd = NULL;
673 bool intransit = false;
676 if (!is_ephemeral(pool))
677 new_pampd = (*tmem_pamops.repatriate_preload)(
678 old_pampd, pool, oidp, index, &intransit);
681 else if (new_pampd != NULL)
683 /* must release the hb->lock else repatriate can't sleep */
684 spin_unlock(&hb->lock);
686 ret = (*tmem_pamops.repatriate)(old_pampd, new_pampd, pool,
687 oidp, index, free, data);
688 if (ret == -EAGAIN) {
689 /* rare I think, but should cond_resched()??? */
690 usleep_range(10, 1000);
691 } else if (ret == -ENOTCONN || ret == -EHOSTDOWN) {
693 } else if (ret != 0 && ret != -ENOENT) {
696 /* note hb->lock has now been unlocked */
701 * For ramster only. If a page in tmem matches the handle, replace the
702 * page so that any subsequent "get" gets the new page. Returns 0 if
703 * there was a page to replace, else returns -1.
705 int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
706 uint32_t index, void *new_pampd)
708 struct tmem_obj *obj;
710 struct tmem_hashbucket *hb;
712 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
713 spin_lock(&hb->lock);
714 obj = tmem_obj_find(hb, oidp);
717 new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd, 0);
718 /* if we bug here, pamops wasn't properly set up for ramster */
719 BUG_ON(tmem_pamops.replace_in_obj == NULL);
720 ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
722 spin_unlock(&hb->lock);
725 EXPORT_SYMBOL_GPL(tmem_replace);
729 * "Get" a page, e.g. if a pampd can be found matching the passed handle,
730 * use a pamops callback to recreated the page from the pampd with the
731 * matching handle. By tmem definition, when a "get" is successful on
732 * an ephemeral page, the page is "flushed", and when a "get" is successful
733 * on a persistent page, the page is retained in tmem. Note that to preserve
734 * coherency, "get" can never be skipped if tmem contains the data.
735 * That is, if a get is done with a certain handle and fails, any
736 * subsequent "get" must also fail (unless of course there is a
737 * "put" done with the same handle).
739 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
740 char *data, size_t *sizep, bool raw, int get_and_free)
742 struct tmem_obj *obj;
744 bool ephemeral = is_ephemeral(pool);
746 struct tmem_hashbucket *hb;
747 bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
748 bool lock_held = false;
752 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
753 spin_lock(&hb->lock);
755 obj = tmem_obj_find(hb, oidp);
758 ppampd = __tmem_pampd_lookup_in_obj(obj, index);
761 #ifdef CONFIG_RAMSTER
762 if ((tmem_pamops.is_remote != NULL) &&
763 tmem_pamops.is_remote(*ppampd)) {
764 ret = tmem_repatriate(ppampd, hb, pool, oidp,
766 /* tmem_repatriate releases hb->lock */
773 } while (ret == -EAGAIN);
775 pampd = tmem_pampd_delete_from_obj(obj, index);
777 pampd = tmem_pampd_lookup_in_obj(obj, index);
781 if (obj->pampd_count == 0) {
782 tmem_obj_free(obj, hb);
783 (*tmem_hostops.obj_free)(obj, pool);
788 ret = (*tmem_pamops.get_data_and_free)(
789 data, sizep, raw, pampd, pool, oidp, index);
791 ret = (*tmem_pamops.get_data)(
792 data, sizep, raw, pampd, pool, oidp, index);
798 spin_unlock(&hb->lock);
803 * If a page in tmem matches the handle, "flush" this page from tmem such
804 * that any subsequent "get" does not succeed (unless, of course, there
805 * was another "put" with the same handle).
807 int tmem_flush_page(struct tmem_pool *pool,
808 struct tmem_oid *oidp, uint32_t index)
810 struct tmem_obj *obj;
813 struct tmem_hashbucket *hb;
815 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
816 spin_lock(&hb->lock);
817 obj = tmem_obj_find(hb, oidp);
820 pampd = tmem_pampd_delete_from_obj(obj, index);
823 (*tmem_pamops.free)(pampd, pool, oidp, index, true);
824 if (obj->pampd_count == 0) {
825 tmem_obj_free(obj, hb);
826 (*tmem_hostops.obj_free)(obj, pool);
831 spin_unlock(&hb->lock);
836 * "Flush" all pages in tmem matching this oid.
838 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
840 struct tmem_obj *obj;
841 struct tmem_hashbucket *hb;
844 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
845 spin_lock(&hb->lock);
846 obj = tmem_obj_find(hb, oidp);
849 tmem_pampd_destroy_all_in_obj(obj, false);
850 tmem_obj_free(obj, hb);
851 (*tmem_hostops.obj_free)(obj, pool);
855 spin_unlock(&hb->lock);
860 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
861 * all subsequent access to this tmem_pool.
863 int tmem_destroy_pool(struct tmem_pool *pool)
869 tmem_pool_flush(pool, 1);
875 static LIST_HEAD(tmem_global_pool_list);
878 * Create a new tmem_pool with the provided flag and return
879 * a pool id provided by the tmem host implementation.
881 void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
883 int persistent = flags & TMEM_POOL_PERSIST;
884 int shared = flags & TMEM_POOL_SHARED;
885 struct tmem_hashbucket *hb = &pool->hashbucket[0];
888 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
889 hb->obj_rb_root = RB_ROOT;
890 spin_lock_init(&hb->lock);
892 INIT_LIST_HEAD(&pool->pool_list);
893 atomic_set(&pool->obj_count, 0);
894 SET_SENTINEL(pool, POOL);
895 list_add_tail(&pool->pool_list, &tmem_global_pool_list);
896 pool->persistent = persistent;
897 pool->shared = shared;