1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
12 #include <linux/bpf.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/anon_inodes.h>
16 #include <linux/file.h>
18 static LIST_HEAD(bpf_map_types);
20 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
22 struct bpf_map_type_list *tl;
25 list_for_each_entry(tl, &bpf_map_types, list_node) {
26 if (tl->type == attr->map_type) {
27 map = tl->ops->map_alloc(attr);
31 map->map_type = attr->map_type;
35 return ERR_PTR(-EINVAL);
38 /* boot time registration of different map implementations */
39 void bpf_register_map_type(struct bpf_map_type_list *tl)
41 list_add(&tl->list_node, &bpf_map_types);
44 /* called from workqueue */
45 static void bpf_map_free_deferred(struct work_struct *work)
47 struct bpf_map *map = container_of(work, struct bpf_map, work);
49 /* implementation dependent freeing */
50 map->ops->map_free(map);
53 /* decrement map refcnt and schedule it for freeing via workqueue
54 * (unrelying map implementation ops->map_free() might sleep)
56 void bpf_map_put(struct bpf_map *map)
58 if (atomic_dec_and_test(&map->refcnt)) {
59 INIT_WORK(&map->work, bpf_map_free_deferred);
60 schedule_work(&map->work);
64 static int bpf_map_release(struct inode *inode, struct file *filp)
66 struct bpf_map *map = filp->private_data;
72 static const struct file_operations bpf_map_fops = {
73 .release = bpf_map_release,
76 /* helper macro to check that unused fields 'union bpf_attr' are zero */
77 #define CHECK_ATTR(CMD) \
78 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
79 sizeof(attr->CMD##_LAST_FIELD), 0, \
81 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
82 sizeof(attr->CMD##_LAST_FIELD)) != NULL
84 #define BPF_MAP_CREATE_LAST_FIELD max_entries
85 /* called via syscall */
86 static int map_create(union bpf_attr *attr)
91 err = CHECK_ATTR(BPF_MAP_CREATE);
95 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
96 map = find_and_alloc_map(attr);
100 atomic_set(&map->refcnt, 1);
102 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
105 /* failed to allocate fd */
111 map->ops->map_free(map);
115 /* if error is returned, fd is released.
116 * On success caller should complete fd access with matching fdput()
118 struct bpf_map *bpf_map_get(struct fd f)
123 return ERR_PTR(-EBADF);
125 if (f.file->f_op != &bpf_map_fops) {
127 return ERR_PTR(-EINVAL);
130 map = f.file->private_data;
135 /* helper to convert user pointers passed inside __aligned_u64 fields */
136 static void __user *u64_to_ptr(__u64 val)
138 return (void __user *) (unsigned long) val;
141 /* last field in 'union bpf_attr' used by this command */
142 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
144 static int map_lookup_elem(union bpf_attr *attr)
146 void __user *ukey = u64_to_ptr(attr->key);
147 void __user *uvalue = u64_to_ptr(attr->value);
148 int ufd = attr->map_fd;
149 struct fd f = fdget(ufd);
154 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
157 map = bpf_map_get(f);
162 key = kmalloc(map->key_size, GFP_USER);
167 if (copy_from_user(key, ukey, map->key_size) != 0)
172 value = map->ops->map_lookup_elem(map, key);
177 if (copy_to_user(uvalue, value, map->value_size) != 0)
191 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
193 static int map_update_elem(union bpf_attr *attr)
195 void __user *ukey = u64_to_ptr(attr->key);
196 void __user *uvalue = u64_to_ptr(attr->value);
197 int ufd = attr->map_fd;
198 struct fd f = fdget(ufd);
203 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
206 map = bpf_map_get(f);
211 key = kmalloc(map->key_size, GFP_USER);
216 if (copy_from_user(key, ukey, map->key_size) != 0)
220 value = kmalloc(map->value_size, GFP_USER);
225 if (copy_from_user(value, uvalue, map->value_size) != 0)
228 /* eBPF program that use maps are running under rcu_read_lock(),
229 * therefore all map accessors rely on this fact, so do the same here
232 err = map->ops->map_update_elem(map, key, value);
244 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
246 static int map_delete_elem(union bpf_attr *attr)
248 void __user *ukey = u64_to_ptr(attr->key);
249 int ufd = attr->map_fd;
250 struct fd f = fdget(ufd);
255 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
258 map = bpf_map_get(f);
263 key = kmalloc(map->key_size, GFP_USER);
268 if (copy_from_user(key, ukey, map->key_size) != 0)
272 err = map->ops->map_delete_elem(map, key);
282 /* last field in 'union bpf_attr' used by this command */
283 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
285 static int map_get_next_key(union bpf_attr *attr)
287 void __user *ukey = u64_to_ptr(attr->key);
288 void __user *unext_key = u64_to_ptr(attr->next_key);
289 int ufd = attr->map_fd;
290 struct fd f = fdget(ufd);
292 void *key, *next_key;
295 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
298 map = bpf_map_get(f);
303 key = kmalloc(map->key_size, GFP_USER);
308 if (copy_from_user(key, ukey, map->key_size) != 0)
312 next_key = kmalloc(map->key_size, GFP_USER);
317 err = map->ops->map_get_next_key(map, key, next_key);
323 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
337 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
339 union bpf_attr attr = {};
342 /* the syscall is limited to root temporarily. This restriction will be
343 * lifted when security audit is clean. Note that eBPF+tracing must have
344 * this restriction, since it may pass kernel data to user space
346 if (!capable(CAP_SYS_ADMIN))
349 if (!access_ok(VERIFY_READ, uattr, 1))
352 if (size > PAGE_SIZE) /* silly large */
355 /* If we're handed a bigger struct than we know of,
356 * ensure all the unknown bits are 0 - i.e. new
357 * user-space does not rely on any kernel feature
358 * extensions we dont know about yet.
360 if (size > sizeof(attr)) {
361 unsigned char __user *addr;
362 unsigned char __user *end;
365 addr = (void __user *)uattr + sizeof(attr);
366 end = (void __user *)uattr + size;
368 for (; addr < end; addr++) {
369 err = get_user(val, addr);
378 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
379 if (copy_from_user(&attr, uattr, size) != 0)
384 err = map_create(&attr);
386 case BPF_MAP_LOOKUP_ELEM:
387 err = map_lookup_elem(&attr);
389 case BPF_MAP_UPDATE_ELEM:
390 err = map_update_elem(&attr);
392 case BPF_MAP_DELETE_ELEM:
393 err = map_delete_elem(&attr);
395 case BPF_MAP_GET_NEXT_KEY:
396 err = map_get_next_key(&attr);