2f185eee2387eaebb4660779749745aa5049363b
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          * Refer to qgroup_shared_accouting() for details.
88          */
89         u64 old_refcnt;
90         u64 new_refcnt;
91 };
92
93 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
94                                            int mod)
95 {
96         if (qg->old_refcnt < seq)
97                 qg->old_refcnt = seq;
98         qg->old_refcnt += mod;
99 }
100
101 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
102                                            int mod)
103 {
104         if (qg->new_refcnt < seq)
105                 qg->new_refcnt = seq;
106         qg->new_refcnt += mod;
107 }
108
109 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
110 {
111         if (qg->old_refcnt < seq)
112                 return 0;
113         return qg->old_refcnt - seq;
114 }
115
116 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
117 {
118         if (qg->new_refcnt < seq)
119                 return 0;
120         return qg->new_refcnt - seq;
121 }
122
123 /*
124  * glue structure to represent the relations between qgroups.
125  */
126 struct btrfs_qgroup_list {
127         struct list_head next_group;
128         struct list_head next_member;
129         struct btrfs_qgroup *group;
130         struct btrfs_qgroup *member;
131 };
132
133 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
134 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
135
136 static int
137 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
138                    int init_flags);
139 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
140
141 /* must be called with qgroup_ioctl_lock held */
142 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
143                                            u64 qgroupid)
144 {
145         struct rb_node *n = fs_info->qgroup_tree.rb_node;
146         struct btrfs_qgroup *qgroup;
147
148         while (n) {
149                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
150                 if (qgroup->qgroupid < qgroupid)
151                         n = n->rb_left;
152                 else if (qgroup->qgroupid > qgroupid)
153                         n = n->rb_right;
154                 else
155                         return qgroup;
156         }
157         return NULL;
158 }
159
160 /* must be called with qgroup_lock held */
161 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
162                                           u64 qgroupid)
163 {
164         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
165         struct rb_node *parent = NULL;
166         struct btrfs_qgroup *qgroup;
167
168         while (*p) {
169                 parent = *p;
170                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
171
172                 if (qgroup->qgroupid < qgroupid)
173                         p = &(*p)->rb_left;
174                 else if (qgroup->qgroupid > qgroupid)
175                         p = &(*p)->rb_right;
176                 else
177                         return qgroup;
178         }
179
180         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
181         if (!qgroup)
182                 return ERR_PTR(-ENOMEM);
183
184         qgroup->qgroupid = qgroupid;
185         INIT_LIST_HEAD(&qgroup->groups);
186         INIT_LIST_HEAD(&qgroup->members);
187         INIT_LIST_HEAD(&qgroup->dirty);
188
189         rb_link_node(&qgroup->node, parent, p);
190         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
191
192         return qgroup;
193 }
194
195 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
196 {
197         struct btrfs_qgroup_list *list;
198
199         list_del(&qgroup->dirty);
200         while (!list_empty(&qgroup->groups)) {
201                 list = list_first_entry(&qgroup->groups,
202                                         struct btrfs_qgroup_list, next_group);
203                 list_del(&list->next_group);
204                 list_del(&list->next_member);
205                 kfree(list);
206         }
207
208         while (!list_empty(&qgroup->members)) {
209                 list = list_first_entry(&qgroup->members,
210                                         struct btrfs_qgroup_list, next_member);
211                 list_del(&list->next_group);
212                 list_del(&list->next_member);
213                 kfree(list);
214         }
215         kfree(qgroup);
216 }
217
218 /* must be called with qgroup_lock held */
219 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
220 {
221         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
222
223         if (!qgroup)
224                 return -ENOENT;
225
226         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
227         __del_qgroup_rb(qgroup);
228         return 0;
229 }
230
231 /* must be called with qgroup_lock held */
232 static int add_relation_rb(struct btrfs_fs_info *fs_info,
233                            u64 memberid, u64 parentid)
234 {
235         struct btrfs_qgroup *member;
236         struct btrfs_qgroup *parent;
237         struct btrfs_qgroup_list *list;
238
239         member = find_qgroup_rb(fs_info, memberid);
240         parent = find_qgroup_rb(fs_info, parentid);
241         if (!member || !parent)
242                 return -ENOENT;
243
244         list = kzalloc(sizeof(*list), GFP_ATOMIC);
245         if (!list)
246                 return -ENOMEM;
247
248         list->group = parent;
249         list->member = member;
250         list_add_tail(&list->next_group, &member->groups);
251         list_add_tail(&list->next_member, &parent->members);
252
253         return 0;
254 }
255
256 /* must be called with qgroup_lock held */
257 static int del_relation_rb(struct btrfs_fs_info *fs_info,
258                            u64 memberid, u64 parentid)
259 {
260         struct btrfs_qgroup *member;
261         struct btrfs_qgroup *parent;
262         struct btrfs_qgroup_list *list;
263
264         member = find_qgroup_rb(fs_info, memberid);
265         parent = find_qgroup_rb(fs_info, parentid);
266         if (!member || !parent)
267                 return -ENOENT;
268
269         list_for_each_entry(list, &member->groups, next_group) {
270                 if (list->group == parent) {
271                         list_del(&list->next_group);
272                         list_del(&list->next_member);
273                         kfree(list);
274                         return 0;
275                 }
276         }
277         return -ENOENT;
278 }
279
280 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
281 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
282                                u64 rfer, u64 excl)
283 {
284         struct btrfs_qgroup *qgroup;
285
286         qgroup = find_qgroup_rb(fs_info, qgroupid);
287         if (!qgroup)
288                 return -EINVAL;
289         if (qgroup->rfer != rfer || qgroup->excl != excl)
290                 return -EINVAL;
291         return 0;
292 }
293 #endif
294
295 /*
296  * The full config is read in one go, only called from open_ctree()
297  * It doesn't use any locking, as at this point we're still single-threaded
298  */
299 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
300 {
301         struct btrfs_key key;
302         struct btrfs_key found_key;
303         struct btrfs_root *quota_root = fs_info->quota_root;
304         struct btrfs_path *path = NULL;
305         struct extent_buffer *l;
306         int slot;
307         int ret = 0;
308         u64 flags = 0;
309         u64 rescan_progress = 0;
310
311         if (!fs_info->quota_enabled)
312                 return 0;
313
314         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
315         if (!fs_info->qgroup_ulist) {
316                 ret = -ENOMEM;
317                 goto out;
318         }
319
320         path = btrfs_alloc_path();
321         if (!path) {
322                 ret = -ENOMEM;
323                 goto out;
324         }
325
326         /* default this to quota off, in case no status key is found */
327         fs_info->qgroup_flags = 0;
328
329         /*
330          * pass 1: read status, all qgroup infos and limits
331          */
332         key.objectid = 0;
333         key.type = 0;
334         key.offset = 0;
335         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
336         if (ret)
337                 goto out;
338
339         while (1) {
340                 struct btrfs_qgroup *qgroup;
341
342                 slot = path->slots[0];
343                 l = path->nodes[0];
344                 btrfs_item_key_to_cpu(l, &found_key, slot);
345
346                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
347                         struct btrfs_qgroup_status_item *ptr;
348
349                         ptr = btrfs_item_ptr(l, slot,
350                                              struct btrfs_qgroup_status_item);
351
352                         if (btrfs_qgroup_status_version(l, ptr) !=
353                             BTRFS_QGROUP_STATUS_VERSION) {
354                                 btrfs_err(fs_info,
355                                  "old qgroup version, quota disabled");
356                                 goto out;
357                         }
358                         if (btrfs_qgroup_status_generation(l, ptr) !=
359                             fs_info->generation) {
360                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
361                                 btrfs_err(fs_info,
362                                         "qgroup generation mismatch, "
363                                         "marked as inconsistent");
364                         }
365                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
366                                                                           ptr);
367                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
368                         goto next1;
369                 }
370
371                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
372                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
373                         goto next1;
374
375                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
376                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
377                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
378                         btrfs_err(fs_info, "inconsitent qgroup config");
379                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
380                 }
381                 if (!qgroup) {
382                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
383                         if (IS_ERR(qgroup)) {
384                                 ret = PTR_ERR(qgroup);
385                                 goto out;
386                         }
387                 }
388                 switch (found_key.type) {
389                 case BTRFS_QGROUP_INFO_KEY: {
390                         struct btrfs_qgroup_info_item *ptr;
391
392                         ptr = btrfs_item_ptr(l, slot,
393                                              struct btrfs_qgroup_info_item);
394                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
395                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
396                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
397                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
398                         /* generation currently unused */
399                         break;
400                 }
401                 case BTRFS_QGROUP_LIMIT_KEY: {
402                         struct btrfs_qgroup_limit_item *ptr;
403
404                         ptr = btrfs_item_ptr(l, slot,
405                                              struct btrfs_qgroup_limit_item);
406                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
407                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
408                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
409                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
410                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
411                         break;
412                 }
413                 }
414 next1:
415                 ret = btrfs_next_item(quota_root, path);
416                 if (ret < 0)
417                         goto out;
418                 if (ret)
419                         break;
420         }
421         btrfs_release_path(path);
422
423         /*
424          * pass 2: read all qgroup relations
425          */
426         key.objectid = 0;
427         key.type = BTRFS_QGROUP_RELATION_KEY;
428         key.offset = 0;
429         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
430         if (ret)
431                 goto out;
432         while (1) {
433                 slot = path->slots[0];
434                 l = path->nodes[0];
435                 btrfs_item_key_to_cpu(l, &found_key, slot);
436
437                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
438                         goto next2;
439
440                 if (found_key.objectid > found_key.offset) {
441                         /* parent <- member, not needed to build config */
442                         /* FIXME should we omit the key completely? */
443                         goto next2;
444                 }
445
446                 ret = add_relation_rb(fs_info, found_key.objectid,
447                                       found_key.offset);
448                 if (ret == -ENOENT) {
449                         btrfs_warn(fs_info,
450                                 "orphan qgroup relation 0x%llx->0x%llx",
451                                 found_key.objectid, found_key.offset);
452                         ret = 0;        /* ignore the error */
453                 }
454                 if (ret)
455                         goto out;
456 next2:
457                 ret = btrfs_next_item(quota_root, path);
458                 if (ret < 0)
459                         goto out;
460                 if (ret)
461                         break;
462         }
463 out:
464         fs_info->qgroup_flags |= flags;
465         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
466                 fs_info->quota_enabled = 0;
467                 fs_info->pending_quota_state = 0;
468         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
469                    ret >= 0) {
470                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
471         }
472         btrfs_free_path(path);
473
474         if (ret < 0) {
475                 ulist_free(fs_info->qgroup_ulist);
476                 fs_info->qgroup_ulist = NULL;
477                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
478         }
479
480         return ret < 0 ? ret : 0;
481 }
482
483 /*
484  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
485  * first two are in single-threaded paths.And for the third one, we have set
486  * quota_root to be null with qgroup_lock held before, so it is safe to clean
487  * up the in-memory structures without qgroup_lock held.
488  */
489 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
490 {
491         struct rb_node *n;
492         struct btrfs_qgroup *qgroup;
493
494         while ((n = rb_first(&fs_info->qgroup_tree))) {
495                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
496                 rb_erase(n, &fs_info->qgroup_tree);
497                 __del_qgroup_rb(qgroup);
498         }
499         /*
500          * we call btrfs_free_qgroup_config() when umounting
501          * filesystem and disabling quota, so we set qgroup_ulit
502          * to be null here to avoid double free.
503          */
504         ulist_free(fs_info->qgroup_ulist);
505         fs_info->qgroup_ulist = NULL;
506 }
507
508 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
509                                     struct btrfs_root *quota_root,
510                                     u64 src, u64 dst)
511 {
512         int ret;
513         struct btrfs_path *path;
514         struct btrfs_key key;
515
516         path = btrfs_alloc_path();
517         if (!path)
518                 return -ENOMEM;
519
520         key.objectid = src;
521         key.type = BTRFS_QGROUP_RELATION_KEY;
522         key.offset = dst;
523
524         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
525
526         btrfs_mark_buffer_dirty(path->nodes[0]);
527
528         btrfs_free_path(path);
529         return ret;
530 }
531
532 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
533                                     struct btrfs_root *quota_root,
534                                     u64 src, u64 dst)
535 {
536         int ret;
537         struct btrfs_path *path;
538         struct btrfs_key key;
539
540         path = btrfs_alloc_path();
541         if (!path)
542                 return -ENOMEM;
543
544         key.objectid = src;
545         key.type = BTRFS_QGROUP_RELATION_KEY;
546         key.offset = dst;
547
548         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
549         if (ret < 0)
550                 goto out;
551
552         if (ret > 0) {
553                 ret = -ENOENT;
554                 goto out;
555         }
556
557         ret = btrfs_del_item(trans, quota_root, path);
558 out:
559         btrfs_free_path(path);
560         return ret;
561 }
562
563 static int add_qgroup_item(struct btrfs_trans_handle *trans,
564                            struct btrfs_root *quota_root, u64 qgroupid)
565 {
566         int ret;
567         struct btrfs_path *path;
568         struct btrfs_qgroup_info_item *qgroup_info;
569         struct btrfs_qgroup_limit_item *qgroup_limit;
570         struct extent_buffer *leaf;
571         struct btrfs_key key;
572
573         if (btrfs_test_is_dummy_root(quota_root))
574                 return 0;
575
576         path = btrfs_alloc_path();
577         if (!path)
578                 return -ENOMEM;
579
580         key.objectid = 0;
581         key.type = BTRFS_QGROUP_INFO_KEY;
582         key.offset = qgroupid;
583
584         /*
585          * Avoid a transaction abort by catching -EEXIST here. In that
586          * case, we proceed by re-initializing the existing structure
587          * on disk.
588          */
589
590         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
591                                       sizeof(*qgroup_info));
592         if (ret && ret != -EEXIST)
593                 goto out;
594
595         leaf = path->nodes[0];
596         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
597                                  struct btrfs_qgroup_info_item);
598         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
599         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
600         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
601         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
602         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
603
604         btrfs_mark_buffer_dirty(leaf);
605
606         btrfs_release_path(path);
607
608         key.type = BTRFS_QGROUP_LIMIT_KEY;
609         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
610                                       sizeof(*qgroup_limit));
611         if (ret && ret != -EEXIST)
612                 goto out;
613
614         leaf = path->nodes[0];
615         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
616                                   struct btrfs_qgroup_limit_item);
617         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
618         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
619         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
620         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
621         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
622
623         btrfs_mark_buffer_dirty(leaf);
624
625         ret = 0;
626 out:
627         btrfs_free_path(path);
628         return ret;
629 }
630
631 static int del_qgroup_item(struct btrfs_trans_handle *trans,
632                            struct btrfs_root *quota_root, u64 qgroupid)
633 {
634         int ret;
635         struct btrfs_path *path;
636         struct btrfs_key key;
637
638         path = btrfs_alloc_path();
639         if (!path)
640                 return -ENOMEM;
641
642         key.objectid = 0;
643         key.type = BTRFS_QGROUP_INFO_KEY;
644         key.offset = qgroupid;
645         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
646         if (ret < 0)
647                 goto out;
648
649         if (ret > 0) {
650                 ret = -ENOENT;
651                 goto out;
652         }
653
654         ret = btrfs_del_item(trans, quota_root, path);
655         if (ret)
656                 goto out;
657
658         btrfs_release_path(path);
659
660         key.type = BTRFS_QGROUP_LIMIT_KEY;
661         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
662         if (ret < 0)
663                 goto out;
664
665         if (ret > 0) {
666                 ret = -ENOENT;
667                 goto out;
668         }
669
670         ret = btrfs_del_item(trans, quota_root, path);
671
672 out:
673         btrfs_free_path(path);
674         return ret;
675 }
676
677 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
678                                     struct btrfs_root *root,
679                                     struct btrfs_qgroup *qgroup)
680 {
681         struct btrfs_path *path;
682         struct btrfs_key key;
683         struct extent_buffer *l;
684         struct btrfs_qgroup_limit_item *qgroup_limit;
685         int ret;
686         int slot;
687
688         key.objectid = 0;
689         key.type = BTRFS_QGROUP_LIMIT_KEY;
690         key.offset = qgroup->qgroupid;
691
692         path = btrfs_alloc_path();
693         if (!path)
694                 return -ENOMEM;
695
696         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
697         if (ret > 0)
698                 ret = -ENOENT;
699
700         if (ret)
701                 goto out;
702
703         l = path->nodes[0];
704         slot = path->slots[0];
705         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
706         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
707         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
708         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
709         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
710         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
711
712         btrfs_mark_buffer_dirty(l);
713
714 out:
715         btrfs_free_path(path);
716         return ret;
717 }
718
719 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
720                                    struct btrfs_root *root,
721                                    struct btrfs_qgroup *qgroup)
722 {
723         struct btrfs_path *path;
724         struct btrfs_key key;
725         struct extent_buffer *l;
726         struct btrfs_qgroup_info_item *qgroup_info;
727         int ret;
728         int slot;
729
730         if (btrfs_test_is_dummy_root(root))
731                 return 0;
732
733         key.objectid = 0;
734         key.type = BTRFS_QGROUP_INFO_KEY;
735         key.offset = qgroup->qgroupid;
736
737         path = btrfs_alloc_path();
738         if (!path)
739                 return -ENOMEM;
740
741         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
742         if (ret > 0)
743                 ret = -ENOENT;
744
745         if (ret)
746                 goto out;
747
748         l = path->nodes[0];
749         slot = path->slots[0];
750         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
751         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
752         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
753         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
754         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
755         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
756
757         btrfs_mark_buffer_dirty(l);
758
759 out:
760         btrfs_free_path(path);
761         return ret;
762 }
763
764 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
765                                      struct btrfs_fs_info *fs_info,
766                                     struct btrfs_root *root)
767 {
768         struct btrfs_path *path;
769         struct btrfs_key key;
770         struct extent_buffer *l;
771         struct btrfs_qgroup_status_item *ptr;
772         int ret;
773         int slot;
774
775         key.objectid = 0;
776         key.type = BTRFS_QGROUP_STATUS_KEY;
777         key.offset = 0;
778
779         path = btrfs_alloc_path();
780         if (!path)
781                 return -ENOMEM;
782
783         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
784         if (ret > 0)
785                 ret = -ENOENT;
786
787         if (ret)
788                 goto out;
789
790         l = path->nodes[0];
791         slot = path->slots[0];
792         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
793         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
794         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
795         btrfs_set_qgroup_status_rescan(l, ptr,
796                                 fs_info->qgroup_rescan_progress.objectid);
797
798         btrfs_mark_buffer_dirty(l);
799
800 out:
801         btrfs_free_path(path);
802         return ret;
803 }
804
805 /*
806  * called with qgroup_lock held
807  */
808 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
809                                   struct btrfs_root *root)
810 {
811         struct btrfs_path *path;
812         struct btrfs_key key;
813         struct extent_buffer *leaf = NULL;
814         int ret;
815         int nr = 0;
816
817         path = btrfs_alloc_path();
818         if (!path)
819                 return -ENOMEM;
820
821         path->leave_spinning = 1;
822
823         key.objectid = 0;
824         key.offset = 0;
825         key.type = 0;
826
827         while (1) {
828                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
829                 if (ret < 0)
830                         goto out;
831                 leaf = path->nodes[0];
832                 nr = btrfs_header_nritems(leaf);
833                 if (!nr)
834                         break;
835                 /*
836                  * delete the leaf one by one
837                  * since the whole tree is going
838                  * to be deleted.
839                  */
840                 path->slots[0] = 0;
841                 ret = btrfs_del_items(trans, root, path, 0, nr);
842                 if (ret)
843                         goto out;
844
845                 btrfs_release_path(path);
846         }
847         ret = 0;
848 out:
849         root->fs_info->pending_quota_state = 0;
850         btrfs_free_path(path);
851         return ret;
852 }
853
854 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
855                        struct btrfs_fs_info *fs_info)
856 {
857         struct btrfs_root *quota_root;
858         struct btrfs_root *tree_root = fs_info->tree_root;
859         struct btrfs_path *path = NULL;
860         struct btrfs_qgroup_status_item *ptr;
861         struct extent_buffer *leaf;
862         struct btrfs_key key;
863         struct btrfs_key found_key;
864         struct btrfs_qgroup *qgroup = NULL;
865         int ret = 0;
866         int slot;
867
868         mutex_lock(&fs_info->qgroup_ioctl_lock);
869         if (fs_info->quota_root) {
870                 fs_info->pending_quota_state = 1;
871                 goto out;
872         }
873
874         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
875         if (!fs_info->qgroup_ulist) {
876                 ret = -ENOMEM;
877                 goto out;
878         }
879
880         /*
881          * initially create the quota tree
882          */
883         quota_root = btrfs_create_tree(trans, fs_info,
884                                        BTRFS_QUOTA_TREE_OBJECTID);
885         if (IS_ERR(quota_root)) {
886                 ret =  PTR_ERR(quota_root);
887                 goto out;
888         }
889
890         path = btrfs_alloc_path();
891         if (!path) {
892                 ret = -ENOMEM;
893                 goto out_free_root;
894         }
895
896         key.objectid = 0;
897         key.type = BTRFS_QGROUP_STATUS_KEY;
898         key.offset = 0;
899
900         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
901                                       sizeof(*ptr));
902         if (ret)
903                 goto out_free_path;
904
905         leaf = path->nodes[0];
906         ptr = btrfs_item_ptr(leaf, path->slots[0],
907                                  struct btrfs_qgroup_status_item);
908         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
909         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
910         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
911                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
912         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
913         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
914
915         btrfs_mark_buffer_dirty(leaf);
916
917         key.objectid = 0;
918         key.type = BTRFS_ROOT_REF_KEY;
919         key.offset = 0;
920
921         btrfs_release_path(path);
922         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
923         if (ret > 0)
924                 goto out_add_root;
925         if (ret < 0)
926                 goto out_free_path;
927
928
929         while (1) {
930                 slot = path->slots[0];
931                 leaf = path->nodes[0];
932                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
933
934                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
935                         ret = add_qgroup_item(trans, quota_root,
936                                               found_key.offset);
937                         if (ret)
938                                 goto out_free_path;
939
940                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
941                         if (IS_ERR(qgroup)) {
942                                 ret = PTR_ERR(qgroup);
943                                 goto out_free_path;
944                         }
945                 }
946                 ret = btrfs_next_item(tree_root, path);
947                 if (ret < 0)
948                         goto out_free_path;
949                 if (ret)
950                         break;
951         }
952
953 out_add_root:
954         btrfs_release_path(path);
955         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
956         if (ret)
957                 goto out_free_path;
958
959         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
960         if (IS_ERR(qgroup)) {
961                 ret = PTR_ERR(qgroup);
962                 goto out_free_path;
963         }
964         spin_lock(&fs_info->qgroup_lock);
965         fs_info->quota_root = quota_root;
966         fs_info->pending_quota_state = 1;
967         spin_unlock(&fs_info->qgroup_lock);
968 out_free_path:
969         btrfs_free_path(path);
970 out_free_root:
971         if (ret) {
972                 free_extent_buffer(quota_root->node);
973                 free_extent_buffer(quota_root->commit_root);
974                 kfree(quota_root);
975         }
976 out:
977         if (ret) {
978                 ulist_free(fs_info->qgroup_ulist);
979                 fs_info->qgroup_ulist = NULL;
980         }
981         mutex_unlock(&fs_info->qgroup_ioctl_lock);
982         return ret;
983 }
984
985 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
986                         struct btrfs_fs_info *fs_info)
987 {
988         struct btrfs_root *tree_root = fs_info->tree_root;
989         struct btrfs_root *quota_root;
990         int ret = 0;
991
992         mutex_lock(&fs_info->qgroup_ioctl_lock);
993         if (!fs_info->quota_root)
994                 goto out;
995         spin_lock(&fs_info->qgroup_lock);
996         fs_info->quota_enabled = 0;
997         fs_info->pending_quota_state = 0;
998         quota_root = fs_info->quota_root;
999         fs_info->quota_root = NULL;
1000         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1001         spin_unlock(&fs_info->qgroup_lock);
1002
1003         btrfs_free_qgroup_config(fs_info);
1004
1005         ret = btrfs_clean_quota_tree(trans, quota_root);
1006         if (ret)
1007                 goto out;
1008
1009         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
1010         if (ret)
1011                 goto out;
1012
1013         list_del(&quota_root->dirty_list);
1014
1015         btrfs_tree_lock(quota_root->node);
1016         clean_tree_block(trans, tree_root->fs_info, quota_root->node);
1017         btrfs_tree_unlock(quota_root->node);
1018         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1019
1020         free_extent_buffer(quota_root->node);
1021         free_extent_buffer(quota_root->commit_root);
1022         kfree(quota_root);
1023 out:
1024         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1025         return ret;
1026 }
1027
1028 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1029                          struct btrfs_qgroup *qgroup)
1030 {
1031         if (list_empty(&qgroup->dirty))
1032                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1033 }
1034
1035 /*
1036  * The easy accounting, if we are adding/removing the only ref for an extent
1037  * then this qgroup and all of the parent qgroups get their refrence and
1038  * exclusive counts adjusted.
1039  *
1040  * Caller should hold fs_info->qgroup_lock.
1041  */
1042 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1043                                     struct ulist *tmp, u64 ref_root,
1044                                     u64 num_bytes, int sign)
1045 {
1046         struct btrfs_qgroup *qgroup;
1047         struct btrfs_qgroup_list *glist;
1048         struct ulist_node *unode;
1049         struct ulist_iterator uiter;
1050         int ret = 0;
1051
1052         qgroup = find_qgroup_rb(fs_info, ref_root);
1053         if (!qgroup)
1054                 goto out;
1055
1056         qgroup->rfer += sign * num_bytes;
1057         qgroup->rfer_cmpr += sign * num_bytes;
1058
1059         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1060         qgroup->excl += sign * num_bytes;
1061         qgroup->excl_cmpr += sign * num_bytes;
1062         if (sign > 0)
1063                 qgroup->reserved -= num_bytes;
1064
1065         qgroup_dirty(fs_info, qgroup);
1066
1067         /* Get all of the parent groups that contain this qgroup */
1068         list_for_each_entry(glist, &qgroup->groups, next_group) {
1069                 ret = ulist_add(tmp, glist->group->qgroupid,
1070                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1071                 if (ret < 0)
1072                         goto out;
1073         }
1074
1075         /* Iterate all of the parents and adjust their reference counts */
1076         ULIST_ITER_INIT(&uiter);
1077         while ((unode = ulist_next(tmp, &uiter))) {
1078                 qgroup = u64_to_ptr(unode->aux);
1079                 qgroup->rfer += sign * num_bytes;
1080                 qgroup->rfer_cmpr += sign * num_bytes;
1081                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1082                 qgroup->excl += sign * num_bytes;
1083                 if (sign > 0)
1084                         qgroup->reserved -= num_bytes;
1085                 qgroup->excl_cmpr += sign * num_bytes;
1086                 qgroup_dirty(fs_info, qgroup);
1087
1088                 /* Add any parents of the parents */
1089                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1090                         ret = ulist_add(tmp, glist->group->qgroupid,
1091                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1092                         if (ret < 0)
1093                                 goto out;
1094                 }
1095         }
1096         ret = 0;
1097 out:
1098         return ret;
1099 }
1100
1101
1102 /*
1103  * Quick path for updating qgroup with only excl refs.
1104  *
1105  * In that case, just update all parent will be enough.
1106  * Or we needs to do a full rescan.
1107  * Caller should also hold fs_info->qgroup_lock.
1108  *
1109  * Return 0 for quick update, return >0 for need to full rescan
1110  * and mark INCONSISTENT flag.
1111  * Return < 0 for other error.
1112  */
1113 static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1114                                    struct ulist *tmp, u64 src, u64 dst,
1115                                    int sign)
1116 {
1117         struct btrfs_qgroup *qgroup;
1118         int ret = 1;
1119         int err = 0;
1120
1121         qgroup = find_qgroup_rb(fs_info, src);
1122         if (!qgroup)
1123                 goto out;
1124         if (qgroup->excl == qgroup->rfer) {
1125                 ret = 0;
1126                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1127                                                qgroup->excl, sign);
1128                 if (err < 0) {
1129                         ret = err;
1130                         goto out;
1131                 }
1132         }
1133 out:
1134         if (ret)
1135                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1136         return ret;
1137 }
1138
1139 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1140                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1141 {
1142         struct btrfs_root *quota_root;
1143         struct btrfs_qgroup *parent;
1144         struct btrfs_qgroup *member;
1145         struct btrfs_qgroup_list *list;
1146         struct ulist *tmp;
1147         int ret = 0;
1148
1149         /* Check the level of src and dst first */
1150         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1151                 return -EINVAL;
1152
1153         tmp = ulist_alloc(GFP_NOFS);
1154         if (!tmp)
1155                 return -ENOMEM;
1156
1157         mutex_lock(&fs_info->qgroup_ioctl_lock);
1158         quota_root = fs_info->quota_root;
1159         if (!quota_root) {
1160                 ret = -EINVAL;
1161                 goto out;
1162         }
1163         member = find_qgroup_rb(fs_info, src);
1164         parent = find_qgroup_rb(fs_info, dst);
1165         if (!member || !parent) {
1166                 ret = -EINVAL;
1167                 goto out;
1168         }
1169
1170         /* check if such qgroup relation exist firstly */
1171         list_for_each_entry(list, &member->groups, next_group) {
1172                 if (list->group == parent) {
1173                         ret = -EEXIST;
1174                         goto out;
1175                 }
1176         }
1177
1178         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1179         if (ret)
1180                 goto out;
1181
1182         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1183         if (ret) {
1184                 del_qgroup_relation_item(trans, quota_root, src, dst);
1185                 goto out;
1186         }
1187
1188         spin_lock(&fs_info->qgroup_lock);
1189         ret = add_relation_rb(quota_root->fs_info, src, dst);
1190         if (ret < 0) {
1191                 spin_unlock(&fs_info->qgroup_lock);
1192                 goto out;
1193         }
1194         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1195         spin_unlock(&fs_info->qgroup_lock);
1196 out:
1197         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1198         ulist_free(tmp);
1199         return ret;
1200 }
1201
1202 int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1203                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1204 {
1205         struct btrfs_root *quota_root;
1206         struct btrfs_qgroup *parent;
1207         struct btrfs_qgroup *member;
1208         struct btrfs_qgroup_list *list;
1209         struct ulist *tmp;
1210         int ret = 0;
1211         int err;
1212
1213         tmp = ulist_alloc(GFP_NOFS);
1214         if (!tmp)
1215                 return -ENOMEM;
1216
1217         quota_root = fs_info->quota_root;
1218         if (!quota_root) {
1219                 ret = -EINVAL;
1220                 goto out;
1221         }
1222
1223         member = find_qgroup_rb(fs_info, src);
1224         parent = find_qgroup_rb(fs_info, dst);
1225         if (!member || !parent) {
1226                 ret = -EINVAL;
1227                 goto out;
1228         }
1229
1230         /* check if such qgroup relation exist firstly */
1231         list_for_each_entry(list, &member->groups, next_group) {
1232                 if (list->group == parent)
1233                         goto exist;
1234         }
1235         ret = -ENOENT;
1236         goto out;
1237 exist:
1238         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1239         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1240         if (err && !ret)
1241                 ret = err;
1242
1243         spin_lock(&fs_info->qgroup_lock);
1244         del_relation_rb(fs_info, src, dst);
1245         ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1246         spin_unlock(&fs_info->qgroup_lock);
1247 out:
1248         ulist_free(tmp);
1249         return ret;
1250 }
1251
1252 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1253                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1254 {
1255         int ret = 0;
1256
1257         mutex_lock(&fs_info->qgroup_ioctl_lock);
1258         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1259         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1260
1261         return ret;
1262 }
1263
1264 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1265                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1266 {
1267         struct btrfs_root *quota_root;
1268         struct btrfs_qgroup *qgroup;
1269         int ret = 0;
1270
1271         mutex_lock(&fs_info->qgroup_ioctl_lock);
1272         quota_root = fs_info->quota_root;
1273         if (!quota_root) {
1274                 ret = -EINVAL;
1275                 goto out;
1276         }
1277         qgroup = find_qgroup_rb(fs_info, qgroupid);
1278         if (qgroup) {
1279                 ret = -EEXIST;
1280                 goto out;
1281         }
1282
1283         ret = add_qgroup_item(trans, quota_root, qgroupid);
1284         if (ret)
1285                 goto out;
1286
1287         spin_lock(&fs_info->qgroup_lock);
1288         qgroup = add_qgroup_rb(fs_info, qgroupid);
1289         spin_unlock(&fs_info->qgroup_lock);
1290
1291         if (IS_ERR(qgroup))
1292                 ret = PTR_ERR(qgroup);
1293 out:
1294         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1295         return ret;
1296 }
1297
1298 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1299                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1300 {
1301         struct btrfs_root *quota_root;
1302         struct btrfs_qgroup *qgroup;
1303         struct btrfs_qgroup_list *list;
1304         int ret = 0;
1305
1306         mutex_lock(&fs_info->qgroup_ioctl_lock);
1307         quota_root = fs_info->quota_root;
1308         if (!quota_root) {
1309                 ret = -EINVAL;
1310                 goto out;
1311         }
1312
1313         qgroup = find_qgroup_rb(fs_info, qgroupid);
1314         if (!qgroup) {
1315                 ret = -ENOENT;
1316                 goto out;
1317         } else {
1318                 /* check if there are no children of this qgroup */
1319                 if (!list_empty(&qgroup->members)) {
1320                         ret = -EBUSY;
1321                         goto out;
1322                 }
1323         }
1324         ret = del_qgroup_item(trans, quota_root, qgroupid);
1325
1326         while (!list_empty(&qgroup->groups)) {
1327                 list = list_first_entry(&qgroup->groups,
1328                                         struct btrfs_qgroup_list, next_group);
1329                 ret = __del_qgroup_relation(trans, fs_info,
1330                                            qgroupid,
1331                                            list->group->qgroupid);
1332                 if (ret)
1333                         goto out;
1334         }
1335
1336         spin_lock(&fs_info->qgroup_lock);
1337         del_qgroup_rb(quota_root->fs_info, qgroupid);
1338         spin_unlock(&fs_info->qgroup_lock);
1339 out:
1340         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1341         return ret;
1342 }
1343
1344 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1345                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1346                        struct btrfs_qgroup_limit *limit)
1347 {
1348         struct btrfs_root *quota_root;
1349         struct btrfs_qgroup *qgroup;
1350         int ret = 0;
1351
1352         mutex_lock(&fs_info->qgroup_ioctl_lock);
1353         quota_root = fs_info->quota_root;
1354         if (!quota_root) {
1355                 ret = -EINVAL;
1356                 goto out;
1357         }
1358
1359         qgroup = find_qgroup_rb(fs_info, qgroupid);
1360         if (!qgroup) {
1361                 ret = -ENOENT;
1362                 goto out;
1363         }
1364
1365         spin_lock(&fs_info->qgroup_lock);
1366         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1367                 qgroup->max_rfer = limit->max_rfer;
1368         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1369                 qgroup->max_excl = limit->max_excl;
1370         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1371                 qgroup->rsv_rfer = limit->rsv_rfer;
1372         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1373                 qgroup->rsv_excl = limit->rsv_excl;
1374         qgroup->lim_flags |= limit->flags;
1375
1376         spin_unlock(&fs_info->qgroup_lock);
1377
1378         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1379         if (ret) {
1380                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1381                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1382                        qgroupid);
1383         }
1384
1385 out:
1386         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1387         return ret;
1388 }
1389
1390 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1391                            struct btrfs_qgroup_operation *oper2)
1392 {
1393         /*
1394          * Ignore seq and type here, we're looking for any operation
1395          * at all related to this extent on that root.
1396          */
1397         if (oper1->bytenr < oper2->bytenr)
1398                 return -1;
1399         if (oper1->bytenr > oper2->bytenr)
1400                 return 1;
1401         if (oper1->ref_root < oper2->ref_root)
1402                 return -1;
1403         if (oper1->ref_root > oper2->ref_root)
1404                 return 1;
1405         return 0;
1406 }
1407
1408 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1409                               struct btrfs_qgroup_operation *oper)
1410 {
1411         struct rb_node *n;
1412         struct btrfs_qgroup_operation *cur;
1413         int cmp;
1414
1415         spin_lock(&fs_info->qgroup_op_lock);
1416         n = fs_info->qgroup_op_tree.rb_node;
1417         while (n) {
1418                 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1419                 cmp = comp_oper_exist(cur, oper);
1420                 if (cmp < 0) {
1421                         n = n->rb_right;
1422                 } else if (cmp) {
1423                         n = n->rb_left;
1424                 } else {
1425                         spin_unlock(&fs_info->qgroup_op_lock);
1426                         return -EEXIST;
1427                 }
1428         }
1429         spin_unlock(&fs_info->qgroup_op_lock);
1430         return 0;
1431 }
1432
1433 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1434                      struct btrfs_qgroup_operation *oper2)
1435 {
1436         if (oper1->bytenr < oper2->bytenr)
1437                 return -1;
1438         if (oper1->bytenr > oper2->bytenr)
1439                 return 1;
1440         if (oper1->ref_root < oper2->ref_root)
1441                 return -1;
1442         if (oper1->ref_root > oper2->ref_root)
1443                 return 1;
1444         if (oper1->seq < oper2->seq)
1445                 return -1;
1446         if (oper1->seq > oper2->seq)
1447                 return 1;
1448         if (oper1->type < oper2->type)
1449                 return -1;
1450         if (oper1->type > oper2->type)
1451                 return 1;
1452         return 0;
1453 }
1454
1455 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1456                               struct btrfs_qgroup_operation *oper)
1457 {
1458         struct rb_node **p;
1459         struct rb_node *parent = NULL;
1460         struct btrfs_qgroup_operation *cur;
1461         int cmp;
1462
1463         spin_lock(&fs_info->qgroup_op_lock);
1464         p = &fs_info->qgroup_op_tree.rb_node;
1465         while (*p) {
1466                 parent = *p;
1467                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1468                 cmp = comp_oper(cur, oper);
1469                 if (cmp < 0) {
1470                         p = &(*p)->rb_right;
1471                 } else if (cmp) {
1472                         p = &(*p)->rb_left;
1473                 } else {
1474                         spin_unlock(&fs_info->qgroup_op_lock);
1475                         return -EEXIST;
1476                 }
1477         }
1478         rb_link_node(&oper->n, parent, p);
1479         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1480         spin_unlock(&fs_info->qgroup_op_lock);
1481         return 0;
1482 }
1483
1484 /*
1485  * Record a quota operation for processing later on.
1486  * @trans: the transaction we are adding the delayed op to.
1487  * @fs_info: the fs_info for this fs.
1488  * @ref_root: the root of the reference we are acting on,
1489  * @bytenr: the bytenr we are acting on.
1490  * @num_bytes: the number of bytes in the reference.
1491  * @type: the type of operation this is.
1492  * @mod_seq: do we need to get a sequence number for looking up roots.
1493  *
1494  * We just add it to our trans qgroup_ref_list and carry on and process these
1495  * operations in order at some later point.  If the reference root isn't a fs
1496  * root then we don't bother with doing anything.
1497  *
1498  * MUST BE HOLDING THE REF LOCK.
1499  */
1500 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1501                             struct btrfs_fs_info *fs_info, u64 ref_root,
1502                             u64 bytenr, u64 num_bytes,
1503                             enum btrfs_qgroup_operation_type type, int mod_seq)
1504 {
1505         struct btrfs_qgroup_operation *oper;
1506         int ret;
1507
1508         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1509                 return 0;
1510
1511         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1512         if (!oper)
1513                 return -ENOMEM;
1514
1515         oper->ref_root = ref_root;
1516         oper->bytenr = bytenr;
1517         oper->num_bytes = num_bytes;
1518         oper->type = type;
1519         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1520         INIT_LIST_HEAD(&oper->elem.list);
1521         oper->elem.seq = 0;
1522
1523         trace_btrfs_qgroup_record_ref(oper);
1524
1525         if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1526                 /*
1527                  * If any operation for this bytenr/ref_root combo
1528                  * exists, then we know it's not exclusively owned and
1529                  * shouldn't be queued up.
1530                  *
1531                  * This also catches the case where we have a cloned
1532                  * extent that gets queued up multiple times during
1533                  * drop snapshot.
1534                  */
1535                 if (qgroup_oper_exists(fs_info, oper)) {
1536                         kfree(oper);
1537                         return 0;
1538                 }
1539         }
1540
1541         ret = insert_qgroup_oper(fs_info, oper);
1542         if (ret) {
1543                 /* Shouldn't happen so have an assert for developers */
1544                 ASSERT(0);
1545                 kfree(oper);
1546                 return ret;
1547         }
1548         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1549
1550         if (mod_seq)
1551                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1552
1553         return 0;
1554 }
1555
1556 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1557                                   struct btrfs_qgroup_operation *oper)
1558 {
1559         struct ulist *tmp;
1560         int sign = 0;
1561         int ret = 0;
1562
1563         tmp = ulist_alloc(GFP_NOFS);
1564         if (!tmp)
1565                 return -ENOMEM;
1566
1567         spin_lock(&fs_info->qgroup_lock);
1568         if (!fs_info->quota_root)
1569                 goto out;
1570
1571         switch (oper->type) {
1572         case BTRFS_QGROUP_OPER_ADD_EXCL:
1573                 sign = 1;
1574                 break;
1575         case BTRFS_QGROUP_OPER_SUB_EXCL:
1576                 sign = -1;
1577                 break;
1578         default:
1579                 ASSERT(0);
1580         }
1581         ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1582                                        oper->num_bytes, sign);
1583 out:
1584         spin_unlock(&fs_info->qgroup_lock);
1585         ulist_free(tmp);
1586         return ret;
1587 }
1588
1589 /*
1590  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1591  * properly.
1592  */
1593 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1594                                   u64 root_to_skip, struct ulist *tmp,
1595                                   struct ulist *roots, struct ulist *qgroups,
1596                                   u64 seq, int *old_roots, int rescan)
1597 {
1598         struct ulist_node *unode;
1599         struct ulist_iterator uiter;
1600         struct ulist_node *tmp_unode;
1601         struct ulist_iterator tmp_uiter;
1602         struct btrfs_qgroup *qg;
1603         int ret;
1604
1605         ULIST_ITER_INIT(&uiter);
1606         while ((unode = ulist_next(roots, &uiter))) {
1607                 /* We don't count our current root here */
1608                 if (unode->val == root_to_skip)
1609                         continue;
1610                 qg = find_qgroup_rb(fs_info, unode->val);
1611                 if (!qg)
1612                         continue;
1613                 /*
1614                  * We could have a pending removal of this same ref so we may
1615                  * not have actually found our ref root when doing
1616                  * btrfs_find_all_roots, so we need to keep track of how many
1617                  * old roots we find in case we removed ours and added a
1618                  * different one at the same time.  I don't think this could
1619                  * happen in practice but that sort of thinking leads to pain
1620                  * and suffering and to the dark side.
1621                  */
1622                 (*old_roots)++;
1623
1624                 ulist_reinit(tmp);
1625                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1626                                 GFP_ATOMIC);
1627                 if (ret < 0)
1628                         return ret;
1629                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1630                 if (ret < 0)
1631                         return ret;
1632                 ULIST_ITER_INIT(&tmp_uiter);
1633                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1634                         struct btrfs_qgroup_list *glist;
1635                         int mod;
1636
1637                         qg = u64_to_ptr(tmp_unode->aux);
1638                         /*
1639                          * We use this sequence number to keep from having to
1640                          * run the whole list and 0 out the refcnt every time.
1641                          * We basically use sequnce as the known 0 count and
1642                          * then add 1 everytime we see a qgroup.  This is how we
1643                          * get how many of the roots actually point up to the
1644                          * upper level qgroups in order to determine exclusive
1645                          * counts.
1646                          *
1647                          * For rescan none of the extent is recorded before so
1648                          * we just don't add old_refcnt.
1649                          */
1650                         if (rescan)
1651                                 mod = 0;
1652                         else
1653                                 mod = 1;
1654                         btrfs_qgroup_update_old_refcnt(qg, seq, mod);
1655                         btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1656                         list_for_each_entry(glist, &qg->groups, next_group) {
1657                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1658                                                 ptr_to_u64(glist->group),
1659                                                 GFP_ATOMIC);
1660                                 if (ret < 0)
1661                                         return ret;
1662                                 ret = ulist_add(tmp, glist->group->qgroupid,
1663                                                 ptr_to_u64(glist->group),
1664                                                 GFP_ATOMIC);
1665                                 if (ret < 0)
1666                                         return ret;
1667                         }
1668                 }
1669         }
1670         return 0;
1671 }
1672
1673 /*
1674  * We need to walk forward in our operation tree and account for any roots that
1675  * were deleted after we made this operation.
1676  */
1677 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1678                                        struct btrfs_qgroup_operation *oper,
1679                                        struct ulist *tmp,
1680                                        struct ulist *qgroups, u64 seq,
1681                                        int *old_roots)
1682 {
1683         struct ulist_node *unode;
1684         struct ulist_iterator uiter;
1685         struct btrfs_qgroup *qg;
1686         struct btrfs_qgroup_operation *tmp_oper;
1687         struct rb_node *n;
1688         int ret;
1689
1690         ulist_reinit(tmp);
1691
1692         /*
1693          * We only walk forward in the tree since we're only interested in
1694          * removals that happened _after_  our operation.
1695          */
1696         spin_lock(&fs_info->qgroup_op_lock);
1697         n = rb_next(&oper->n);
1698         spin_unlock(&fs_info->qgroup_op_lock);
1699         if (!n)
1700                 return 0;
1701         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1702         while (tmp_oper->bytenr == oper->bytenr) {
1703                 /*
1704                  * If it's not a removal we don't care, additions work out
1705                  * properly with our refcnt tracking.
1706                  */
1707                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1708                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1709                         goto next;
1710                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1711                 if (!qg)
1712                         goto next;
1713                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1714                                 GFP_ATOMIC);
1715                 if (ret) {
1716                         if (ret < 0)
1717                                 return ret;
1718                         /*
1719                          * We only want to increase old_roots if this qgroup is
1720                          * not already in the list of qgroups.  If it is already
1721                          * there then that means it must have been re-added or
1722                          * the delete will be discarded because we had an
1723                          * existing ref that we haven't looked up yet.  In this
1724                          * case we don't want to increase old_roots.  So if ret
1725                          * == 1 then we know that this is the first time we've
1726                          * seen this qgroup and we can bump the old_roots.
1727                          */
1728                         (*old_roots)++;
1729                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1730                                         GFP_ATOMIC);
1731                         if (ret < 0)
1732                                 return ret;
1733                 }
1734 next:
1735                 spin_lock(&fs_info->qgroup_op_lock);
1736                 n = rb_next(&tmp_oper->n);
1737                 spin_unlock(&fs_info->qgroup_op_lock);
1738                 if (!n)
1739                         break;
1740                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1741         }
1742
1743         /* Ok now process the qgroups we found */
1744         ULIST_ITER_INIT(&uiter);
1745         while ((unode = ulist_next(tmp, &uiter))) {
1746                 struct btrfs_qgroup_list *glist;
1747
1748                 qg = u64_to_ptr(unode->aux);
1749                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1750                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1751                 list_for_each_entry(glist, &qg->groups, next_group) {
1752                         ret = ulist_add(qgroups, glist->group->qgroupid,
1753                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1754                         if (ret < 0)
1755                                 return ret;
1756                         ret = ulist_add(tmp, glist->group->qgroupid,
1757                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1758                         if (ret < 0)
1759                                 return ret;
1760                 }
1761         }
1762         return 0;
1763 }
1764
1765 /* Add refcnt for the newly added reference. */
1766 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1767                                   struct btrfs_qgroup_operation *oper,
1768                                   struct btrfs_qgroup *qgroup,
1769                                   struct ulist *tmp, struct ulist *qgroups,
1770                                   u64 seq)
1771 {
1772         struct ulist_node *unode;
1773         struct ulist_iterator uiter;
1774         struct btrfs_qgroup *qg;
1775         int ret;
1776
1777         ulist_reinit(tmp);
1778         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1779                         GFP_ATOMIC);
1780         if (ret < 0)
1781                 return ret;
1782         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1783                         GFP_ATOMIC);
1784         if (ret < 0)
1785                 return ret;
1786         ULIST_ITER_INIT(&uiter);
1787         while ((unode = ulist_next(tmp, &uiter))) {
1788                 struct btrfs_qgroup_list *glist;
1789
1790                 qg = u64_to_ptr(unode->aux);
1791                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
1792                         btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1793                 else
1794                         btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1795                 list_for_each_entry(glist, &qg->groups, next_group) {
1796                         ret = ulist_add(tmp, glist->group->qgroupid,
1797                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1798                         if (ret < 0)
1799                                 return ret;
1800                         ret = ulist_add(qgroups, glist->group->qgroupid,
1801                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1802                         if (ret < 0)
1803                                 return ret;
1804                 }
1805         }
1806         return 0;
1807 }
1808
1809 #define UPDATE_NEW      0
1810 #define UPDATE_OLD      1
1811 /*
1812  * Walk all of the roots that points to the bytenr and adjust their refcnts.
1813  */
1814 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1815                                 struct ulist *roots, struct ulist *tmp,
1816                                 struct ulist *qgroups, u64 seq, int update_old)
1817 {
1818         struct ulist_node *unode;
1819         struct ulist_iterator uiter;
1820         struct ulist_node *tmp_unode;
1821         struct ulist_iterator tmp_uiter;
1822         struct btrfs_qgroup *qg;
1823         int ret = 0;
1824
1825         if (!roots)
1826                 return 0;
1827         ULIST_ITER_INIT(&uiter);
1828         while ((unode = ulist_next(roots, &uiter))) {
1829                 qg = find_qgroup_rb(fs_info, unode->val);
1830                 if (!qg)
1831                         continue;
1832
1833                 ulist_reinit(tmp);
1834                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1835                                 GFP_ATOMIC);
1836                 if (ret < 0)
1837                         return ret;
1838                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1839                 if (ret < 0)
1840                         return ret;
1841                 ULIST_ITER_INIT(&tmp_uiter);
1842                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1843                         struct btrfs_qgroup_list *glist;
1844
1845                         qg = u64_to_ptr(tmp_unode->aux);
1846                         if (update_old)
1847                                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1848                         else
1849                                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1850                         list_for_each_entry(glist, &qg->groups, next_group) {
1851                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1852                                                 ptr_to_u64(glist->group),
1853                                                 GFP_ATOMIC);
1854                                 if (ret < 0)
1855                                         return ret;
1856                                 ret = ulist_add(tmp, glist->group->qgroupid,
1857                                                 ptr_to_u64(glist->group),
1858                                                 GFP_ATOMIC);
1859                                 if (ret < 0)
1860                                         return ret;
1861                         }
1862                 }
1863         }
1864         return 0;
1865 }
1866
1867 /*
1868  * Update qgroup rfer/excl counters.
1869  * Rfer update is easy, codes can explain themselves.
1870  * Excl update is tricky, the update is split into 2 part.
1871  * Part 1: Possible exclusive <-> sharing detect:
1872  *      |       A       |       !A      |
1873  *  -------------------------------------
1874  *  B   |       *       |       -       |
1875  *  -------------------------------------
1876  *  !B  |       +       |       **      |
1877  *  -------------------------------------
1878  *
1879  * Conditions:
1880  * A:   cur_old_roots < nr_old_roots    (not exclusive before)
1881  * !A:  cur_old_roots == nr_old_roots   (possible exclusive before)
1882  * B:   cur_new_roots < nr_new_roots    (not exclusive now)
1883  * !B:  cur_new_roots == nr_new_roots   (possible exclsuive now)
1884  *
1885  * Results:
1886  * +: Possible sharing -> exclusive     -: Possible exclusive -> sharing
1887  * *: Definitely not changed.           **: Possible unchanged.
1888  *
1889  * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
1890  *
1891  * To make the logic clear, we first use condition A and B to split
1892  * combination into 4 results.
1893  *
1894  * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
1895  * only on variant maybe 0.
1896  *
1897  * Lastly, check result **, since there are 2 variants maybe 0, split them
1898  * again(2x2).
1899  * But this time we don't need to consider other things, the codes and logic
1900  * is easy to understand now.
1901  */
1902 static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
1903                                   struct ulist *qgroups,
1904                                   u64 nr_old_roots,
1905                                   u64 nr_new_roots,
1906                                   u64 num_bytes, u64 seq)
1907 {
1908         struct ulist_node *unode;
1909         struct ulist_iterator uiter;
1910         struct btrfs_qgroup *qg;
1911         u64 cur_new_count, cur_old_count;
1912
1913         ULIST_ITER_INIT(&uiter);
1914         while ((unode = ulist_next(qgroups, &uiter))) {
1915                 bool dirty = false;
1916
1917                 qg = u64_to_ptr(unode->aux);
1918                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
1919                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
1920
1921                 /* Rfer update part */
1922                 if (cur_old_count == 0 && cur_new_count > 0) {
1923                         qg->rfer += num_bytes;
1924                         qg->rfer_cmpr += num_bytes;
1925                         dirty = true;
1926                 }
1927                 if (cur_old_count > 0 && cur_new_count == 0) {
1928                         qg->rfer -= num_bytes;
1929                         qg->rfer_cmpr -= num_bytes;
1930                         dirty = true;
1931                 }
1932
1933                 /* Excl update part */
1934                 /* Exclusive/none -> shared case */
1935                 if (cur_old_count == nr_old_roots &&
1936                     cur_new_count < nr_new_roots) {
1937                         /* Exclusive -> shared */
1938                         if (cur_old_count != 0) {
1939                                 qg->excl -= num_bytes;
1940                                 qg->excl_cmpr -= num_bytes;
1941                                 dirty = true;
1942                         }
1943                 }
1944
1945                 /* Shared -> exclusive/none case */
1946                 if (cur_old_count < nr_old_roots &&
1947                     cur_new_count == nr_new_roots) {
1948                         /* Shared->exclusive */
1949                         if (cur_new_count != 0) {
1950                                 qg->excl += num_bytes;
1951                                 qg->excl_cmpr += num_bytes;
1952                                 dirty = true;
1953                         }
1954                 }
1955
1956                 /* Exclusive/none -> exclusive/none case */
1957                 if (cur_old_count == nr_old_roots &&
1958                     cur_new_count == nr_new_roots) {
1959                         if (cur_old_count == 0) {
1960                                 /* None -> exclusive/none */
1961
1962                                 if (cur_new_count != 0) {
1963                                         /* None -> exclusive */
1964                                         qg->excl += num_bytes;
1965                                         qg->excl_cmpr += num_bytes;
1966                                         dirty = true;
1967                                 }
1968                                 /* None -> none, nothing changed */
1969                         } else {
1970                                 /* Exclusive -> exclusive/none */
1971
1972                                 if (cur_new_count == 0) {
1973                                         /* Exclusive -> none */
1974                                         qg->excl -= num_bytes;
1975                                         qg->excl_cmpr -= num_bytes;
1976                                         dirty = true;
1977                                 }
1978                                 /* Exclusive -> exclusive, nothing changed */
1979                         }
1980                 }
1981                 if (dirty)
1982                         qgroup_dirty(fs_info, qg);
1983         }
1984         return 0;
1985 }
1986
1987 /*
1988  * This adjusts the counters for all referenced qgroups if need be.
1989  */
1990 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1991                                   u64 root_to_skip, u64 num_bytes,
1992                                   struct ulist *qgroups, u64 seq,
1993                                   int old_roots, int new_roots, int rescan)
1994 {
1995         struct ulist_node *unode;
1996         struct ulist_iterator uiter;
1997         struct btrfs_qgroup *qg;
1998         u64 cur_new_count, cur_old_count;
1999
2000         ULIST_ITER_INIT(&uiter);
2001         while ((unode = ulist_next(qgroups, &uiter))) {
2002                 bool dirty = false;
2003
2004                 qg = u64_to_ptr(unode->aux);
2005                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
2006                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
2007
2008                 /*
2009                  * Wasn't referenced before but is now, add to the reference
2010                  * counters.
2011                  */
2012                 if (cur_old_count == 0 && cur_new_count > 0) {
2013                         qg->rfer += num_bytes;
2014                         qg->rfer_cmpr += num_bytes;
2015                         dirty = true;
2016                 }
2017
2018                 /*
2019                  * Was referenced before but isn't now, subtract from the
2020                  * reference counters.
2021                  */
2022                 if (cur_old_count > 0 && cur_new_count == 0) {
2023                         qg->rfer -= num_bytes;
2024                         qg->rfer_cmpr -= num_bytes;
2025                         dirty = true;
2026                 }
2027
2028                 /*
2029                  * If our refcount was the same as the roots previously but our
2030                  * new count isn't the same as the number of roots now then we
2031                  * went from having a exclusive reference on this range to not.
2032                  */
2033                 if (old_roots && cur_old_count == old_roots &&
2034                     (cur_new_count != new_roots || new_roots == 0)) {
2035                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
2036                         qg->excl -= num_bytes;
2037                         qg->excl_cmpr -= num_bytes;
2038                         dirty = true;
2039                 }
2040
2041                 /*
2042                  * If we didn't reference all the roots before but now we do we
2043                  * have an exclusive reference to this range.
2044                  */
2045                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
2046                     && cur_new_count == new_roots) {
2047                         qg->excl += num_bytes;
2048                         qg->excl_cmpr += num_bytes;
2049                         dirty = true;
2050                 }
2051
2052                 if (dirty)
2053                         qgroup_dirty(fs_info, qg);
2054         }
2055         return 0;
2056 }
2057
2058 /*
2059  * If we removed a data extent and there were other references for that bytenr
2060  * then we need to lookup all referenced roots to make sure we still don't
2061  * reference this bytenr.  If we do then we can just discard this operation.
2062  */
2063 static int check_existing_refs(struct btrfs_trans_handle *trans,
2064                                struct btrfs_fs_info *fs_info,
2065                                struct btrfs_qgroup_operation *oper)
2066 {
2067         struct ulist *roots = NULL;
2068         struct ulist_node *unode;
2069         struct ulist_iterator uiter;
2070         int ret = 0;
2071
2072         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2073                                    oper->elem.seq, &roots);
2074         if (ret < 0)
2075                 return ret;
2076         ret = 0;
2077
2078         ULIST_ITER_INIT(&uiter);
2079         while ((unode = ulist_next(roots, &uiter))) {
2080                 if (unode->val == oper->ref_root) {
2081                         ret = 1;
2082                         break;
2083                 }
2084         }
2085         ulist_free(roots);
2086         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2087
2088         return ret;
2089 }
2090
2091 /*
2092  * If we share a reference across multiple roots then we may need to adjust
2093  * various qgroups referenced and exclusive counters.  The basic premise is this
2094  *
2095  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
2096  * qgroups and resetting their refcount to 0 we just constantly bump this
2097  * sequence number to act as the base reference count.  This means that if
2098  * anybody is equal to or below this sequence they were never referenced.  We
2099  * jack this sequence up by the number of roots we found each time in order to
2100  * make sure we don't have any overlap.
2101  *
2102  * 2) We first search all the roots that reference the area _except_ the root
2103  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
2104  * before.
2105  *
2106  * 3) We walk all of the qgroups referenced by the root we are currently acting
2107  * on, and will either adjust old_refcnt in the case of a removal or the
2108  * new_refcnt in the case of an addition.
2109  *
2110  * 4) Finally we walk all the qgroups that are referenced by this range
2111  * including the root we are acting on currently.  We will adjust the counters
2112  * based on the number of roots we had and will have after this operation.
2113  *
2114  * Take this example as an illustration
2115  *
2116  *                      [qgroup 1/0]
2117  *                   /         |          \
2118  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
2119  *                 \          |            /
2120  *                [        extent           ]
2121  *
2122  * Say we are adding a reference that is covered by qg 0/0.  The first step
2123  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
2124  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
2125  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
2126  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
2127  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
2128  * reference and thus must add the size to the referenced bytes.  Everything
2129  * else is the same so nothing else changes.
2130  */
2131 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
2132                                     struct btrfs_fs_info *fs_info,
2133                                     struct btrfs_qgroup_operation *oper)
2134 {
2135         struct ulist *roots = NULL;
2136         struct ulist *qgroups, *tmp;
2137         struct btrfs_qgroup *qgroup;
2138         struct seq_list elem = SEQ_LIST_INIT(elem);
2139         u64 seq;
2140         int old_roots = 0;
2141         int new_roots = 0;
2142         int ret = 0;
2143
2144         if (oper->elem.seq) {
2145                 ret = check_existing_refs(trans, fs_info, oper);
2146                 if (ret < 0)
2147                         return ret;
2148                 if (ret)
2149                         return 0;
2150         }
2151
2152         qgroups = ulist_alloc(GFP_NOFS);
2153         if (!qgroups)
2154                 return -ENOMEM;
2155
2156         tmp = ulist_alloc(GFP_NOFS);
2157         if (!tmp) {
2158                 ulist_free(qgroups);
2159                 return -ENOMEM;
2160         }
2161
2162         btrfs_get_tree_mod_seq(fs_info, &elem);
2163         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
2164                                    &roots);
2165         btrfs_put_tree_mod_seq(fs_info, &elem);
2166         if (ret < 0) {
2167                 ulist_free(qgroups);
2168                 ulist_free(tmp);
2169                 return ret;
2170         }
2171         spin_lock(&fs_info->qgroup_lock);
2172         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
2173         if (!qgroup)
2174                 goto out;
2175         seq = fs_info->qgroup_seq;
2176
2177         /*
2178          * So roots is the list of all the roots currently pointing at the
2179          * bytenr, including the ref we are adding if we are adding, or not if
2180          * we are removing a ref.  So we pass in the ref_root to skip that root
2181          * in our calculations.  We set old_refnct and new_refcnt cause who the
2182          * hell knows what everything looked like before, and it doesn't matter
2183          * except...
2184          */
2185         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
2186                                      seq, &old_roots, 0);
2187         if (ret < 0)
2188                 goto out;
2189
2190         /*
2191          * Now adjust the refcounts of the qgroups that care about this
2192          * reference, either the old_count in the case of removal or new_count
2193          * in the case of an addition.
2194          */
2195         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
2196                                      seq);
2197         if (ret < 0)
2198                 goto out;
2199
2200         /*
2201          * ...in the case of removals.  If we had a removal before we got around
2202          * to processing this operation then we need to find that guy and count
2203          * his references as if they really existed so we don't end up screwing
2204          * up the exclusive counts.  Then whenever we go to process the delete
2205          * everything will be grand and we can account for whatever exclusive
2206          * changes need to be made there.  We also have to pass in old_roots so
2207          * we have an accurate count of the roots as it pertains to this
2208          * operations view of the world.
2209          */
2210         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
2211                                           &old_roots);
2212         if (ret < 0)
2213                 goto out;
2214
2215         /*
2216          * We are adding our root, need to adjust up the number of roots,
2217          * otherwise old_roots is the number of roots we want.
2218          */
2219         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
2220                 new_roots = old_roots + 1;
2221         } else {
2222                 new_roots = old_roots;
2223                 old_roots++;
2224         }
2225
2226         /*
2227          * Bump qgroup_seq to avoid seq overlap
2228          * XXX: This makes qgroup_seq mismatch with oper->seq.
2229          */
2230         fs_info->qgroup_seq += old_roots + 1;
2231
2232
2233         /*
2234          * And now the magic happens, bless Arne for having a pretty elegant
2235          * solution for this.
2236          */
2237         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
2238                                qgroups, seq, old_roots, new_roots, 0);
2239 out:
2240         spin_unlock(&fs_info->qgroup_lock);
2241         ulist_free(qgroups);
2242         ulist_free(roots);
2243         ulist_free(tmp);
2244         return ret;
2245 }
2246
2247 /*
2248  * Process a reference to a shared subtree. This type of operation is
2249  * queued during snapshot removal when we encounter extents which are
2250  * shared between more than one root.
2251  */
2252 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
2253                                      struct btrfs_fs_info *fs_info,
2254                                      struct btrfs_qgroup_operation *oper)
2255 {
2256         struct ulist *roots = NULL;
2257         struct ulist_node *unode;
2258         struct ulist_iterator uiter;
2259         struct btrfs_qgroup_list *glist;
2260         struct ulist *parents;
2261         int ret = 0;
2262         int err;
2263         struct btrfs_qgroup *qg;
2264         u64 root_obj = 0;
2265         struct seq_list elem = SEQ_LIST_INIT(elem);
2266
2267         parents = ulist_alloc(GFP_NOFS);
2268         if (!parents)
2269                 return -ENOMEM;
2270
2271         btrfs_get_tree_mod_seq(fs_info, &elem);
2272         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2273                                    elem.seq, &roots);
2274         btrfs_put_tree_mod_seq(fs_info, &elem);
2275         if (ret < 0)
2276                 goto out;
2277
2278         if (roots->nnodes != 1)
2279                 goto out;
2280
2281         ULIST_ITER_INIT(&uiter);
2282         unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
2283         /*
2284          * If we find our ref root then that means all refs
2285          * this extent has to the root have not yet been
2286          * deleted. In that case, we do nothing and let the
2287          * last ref for this bytenr drive our update.
2288          *
2289          * This can happen for example if an extent is
2290          * referenced multiple times in a snapshot (clone,
2291          * etc). If we are in the middle of snapshot removal,
2292          * queued updates for such an extent will find the
2293          * root if we have not yet finished removing the
2294          * snapshot.
2295          */
2296         if (unode->val == oper->ref_root)
2297                 goto out;
2298
2299         root_obj = unode->val;
2300         BUG_ON(!root_obj);
2301
2302         spin_lock(&fs_info->qgroup_lock);
2303         qg = find_qgroup_rb(fs_info, root_obj);
2304         if (!qg)
2305                 goto out_unlock;
2306
2307         qg->excl += oper->num_bytes;
2308         qg->excl_cmpr += oper->num_bytes;
2309         qgroup_dirty(fs_info, qg);
2310
2311         /*
2312          * Adjust counts for parent groups. First we find all
2313          * parents, then in the 2nd loop we do the adjustment
2314          * while adding parents of the parents to our ulist.
2315          */
2316         list_for_each_entry(glist, &qg->groups, next_group) {
2317                 err = ulist_add(parents, glist->group->qgroupid,
2318                                 ptr_to_u64(glist->group), GFP_ATOMIC);
2319                 if (err < 0) {
2320                         ret = err;
2321                         goto out_unlock;
2322                 }
2323         }
2324
2325         ULIST_ITER_INIT(&uiter);
2326         while ((unode = ulist_next(parents, &uiter))) {
2327                 qg = u64_to_ptr(unode->aux);
2328                 qg->excl += oper->num_bytes;
2329                 qg->excl_cmpr += oper->num_bytes;
2330                 qgroup_dirty(fs_info, qg);
2331
2332                 /* Add any parents of the parents */
2333                 list_for_each_entry(glist, &qg->groups, next_group) {
2334                         err = ulist_add(parents, glist->group->qgroupid,
2335                                         ptr_to_u64(glist->group), GFP_ATOMIC);
2336                         if (err < 0) {
2337                                 ret = err;
2338                                 goto out_unlock;
2339                         }
2340                 }
2341         }
2342
2343 out_unlock:
2344         spin_unlock(&fs_info->qgroup_lock);
2345
2346 out:
2347         ulist_free(roots);
2348         ulist_free(parents);
2349         return ret;
2350 }
2351
2352 /*
2353  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2354  * from the fs. First, all roots referencing the extent are searched, and
2355  * then the space is accounted accordingly to the different roots. The
2356  * accounting algorithm works in 3 steps documented inline.
2357  */
2358 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2359                                 struct btrfs_fs_info *fs_info,
2360                                 struct btrfs_qgroup_operation *oper)
2361 {
2362         int ret = 0;
2363
2364         if (!fs_info->quota_enabled)
2365                 return 0;
2366
2367         BUG_ON(!fs_info->quota_root);
2368
2369         mutex_lock(&fs_info->qgroup_rescan_lock);
2370         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2371                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2372                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2373                         return 0;
2374                 }
2375         }
2376         mutex_unlock(&fs_info->qgroup_rescan_lock);
2377
2378         ASSERT(is_fstree(oper->ref_root));
2379
2380         trace_btrfs_qgroup_account(oper);
2381
2382         switch (oper->type) {
2383         case BTRFS_QGROUP_OPER_ADD_EXCL:
2384         case BTRFS_QGROUP_OPER_SUB_EXCL:
2385                 ret = qgroup_excl_accounting(fs_info, oper);
2386                 break;
2387         case BTRFS_QGROUP_OPER_ADD_SHARED:
2388         case BTRFS_QGROUP_OPER_SUB_SHARED:
2389                 ret = qgroup_shared_accounting(trans, fs_info, oper);
2390                 break;
2391         case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2392                 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2393                 break;
2394         default:
2395                 ASSERT(0);
2396         }
2397         return ret;
2398 }
2399
2400 /*
2401  * Needs to be called everytime we run delayed refs, even if there is an error
2402  * in order to cleanup outstanding operations.
2403  */
2404 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2405                                     struct btrfs_fs_info *fs_info)
2406 {
2407         struct btrfs_qgroup_operation *oper;
2408         int ret = 0;
2409
2410         while (!list_empty(&trans->qgroup_ref_list)) {
2411                 oper = list_first_entry(&trans->qgroup_ref_list,
2412                                         struct btrfs_qgroup_operation, list);
2413                 list_del_init(&oper->list);
2414                 if (!ret || !trans->aborted)
2415                         ret = btrfs_qgroup_account(trans, fs_info, oper);
2416                 spin_lock(&fs_info->qgroup_op_lock);
2417                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2418                 spin_unlock(&fs_info->qgroup_op_lock);
2419                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2420                 kfree(oper);
2421         }
2422         return ret;
2423 }
2424
2425 /*
2426  * called from commit_transaction. Writes all changed qgroups to disk.
2427  */
2428 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2429                       struct btrfs_fs_info *fs_info)
2430 {
2431         struct btrfs_root *quota_root = fs_info->quota_root;
2432         int ret = 0;
2433         int start_rescan_worker = 0;
2434
2435         if (!quota_root)
2436                 goto out;
2437
2438         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
2439                 start_rescan_worker = 1;
2440
2441         fs_info->quota_enabled = fs_info->pending_quota_state;
2442
2443         spin_lock(&fs_info->qgroup_lock);
2444         while (!list_empty(&fs_info->dirty_qgroups)) {
2445                 struct btrfs_qgroup *qgroup;
2446                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2447                                           struct btrfs_qgroup, dirty);
2448                 list_del_init(&qgroup->dirty);
2449                 spin_unlock(&fs_info->qgroup_lock);
2450                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2451                 if (ret)
2452                         fs_info->qgroup_flags |=
2453                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2454                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2455                 if (ret)
2456                         fs_info->qgroup_flags |=
2457                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2458                 spin_lock(&fs_info->qgroup_lock);
2459         }
2460         if (fs_info->quota_enabled)
2461                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2462         else
2463                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2464         spin_unlock(&fs_info->qgroup_lock);
2465
2466         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2467         if (ret)
2468                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2469
2470         if (!ret && start_rescan_worker) {
2471                 ret = qgroup_rescan_init(fs_info, 0, 1);
2472                 if (!ret) {
2473                         qgroup_rescan_zero_tracking(fs_info);
2474                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2475                                          &fs_info->qgroup_rescan_work);
2476                 }
2477                 ret = 0;
2478         }
2479
2480 out:
2481
2482         return ret;
2483 }
2484
2485 /*
2486  * copy the acounting information between qgroups. This is necessary when a
2487  * snapshot or a subvolume is created
2488  */
2489 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2490                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2491                          struct btrfs_qgroup_inherit *inherit)
2492 {
2493         int ret = 0;
2494         int i;
2495         u64 *i_qgroups;
2496         struct btrfs_root *quota_root = fs_info->quota_root;
2497         struct btrfs_qgroup *srcgroup;
2498         struct btrfs_qgroup *dstgroup;
2499         u32 level_size = 0;
2500         u64 nums;
2501
2502         mutex_lock(&fs_info->qgroup_ioctl_lock);
2503         if (!fs_info->quota_enabled)
2504                 goto out;
2505
2506         if (!quota_root) {
2507                 ret = -EINVAL;
2508                 goto out;
2509         }
2510
2511         if (inherit) {
2512                 i_qgroups = (u64 *)(inherit + 1);
2513                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2514                        2 * inherit->num_excl_copies;
2515                 for (i = 0; i < nums; ++i) {
2516                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2517                         if (!srcgroup) {
2518                                 ret = -EINVAL;
2519                                 goto out;
2520                         }
2521
2522                         if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2523                                 ret = -EINVAL;
2524                                 goto out;
2525                         }
2526                         ++i_qgroups;
2527                 }
2528         }
2529
2530         /*
2531          * create a tracking group for the subvol itself
2532          */
2533         ret = add_qgroup_item(trans, quota_root, objectid);
2534         if (ret)
2535                 goto out;
2536
2537         if (srcid) {
2538                 struct btrfs_root *srcroot;
2539                 struct btrfs_key srckey;
2540
2541                 srckey.objectid = srcid;
2542                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2543                 srckey.offset = (u64)-1;
2544                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2545                 if (IS_ERR(srcroot)) {
2546                         ret = PTR_ERR(srcroot);
2547                         goto out;
2548                 }
2549
2550                 rcu_read_lock();
2551                 level_size = srcroot->nodesize;
2552                 rcu_read_unlock();
2553         }
2554
2555         /*
2556          * add qgroup to all inherited groups
2557          */
2558         if (inherit) {
2559                 i_qgroups = (u64 *)(inherit + 1);
2560                 for (i = 0; i < inherit->num_qgroups; ++i) {
2561                         ret = add_qgroup_relation_item(trans, quota_root,
2562                                                        objectid, *i_qgroups);
2563                         if (ret)
2564                                 goto out;
2565                         ret = add_qgroup_relation_item(trans, quota_root,
2566                                                        *i_qgroups, objectid);
2567                         if (ret)
2568                                 goto out;
2569                         ++i_qgroups;
2570                 }
2571         }
2572
2573
2574         spin_lock(&fs_info->qgroup_lock);
2575
2576         dstgroup = add_qgroup_rb(fs_info, objectid);
2577         if (IS_ERR(dstgroup)) {
2578                 ret = PTR_ERR(dstgroup);
2579                 goto unlock;
2580         }
2581
2582         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2583                 dstgroup->lim_flags = inherit->lim.flags;
2584                 dstgroup->max_rfer = inherit->lim.max_rfer;
2585                 dstgroup->max_excl = inherit->lim.max_excl;
2586                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2587                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2588
2589                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2590                 if (ret) {
2591                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2592                         btrfs_info(fs_info, "unable to update quota limit for %llu",
2593                                dstgroup->qgroupid);
2594                         goto unlock;
2595                 }
2596         }
2597
2598         if (srcid) {
2599                 srcgroup = find_qgroup_rb(fs_info, srcid);
2600                 if (!srcgroup)
2601                         goto unlock;
2602
2603                 /*
2604                  * We call inherit after we clone the root in order to make sure
2605                  * our counts don't go crazy, so at this point the only
2606                  * difference between the two roots should be the root node.
2607                  */
2608                 dstgroup->rfer = srcgroup->rfer;
2609                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2610                 dstgroup->excl = level_size;
2611                 dstgroup->excl_cmpr = level_size;
2612                 srcgroup->excl = level_size;
2613                 srcgroup->excl_cmpr = level_size;
2614
2615                 /* inherit the limit info */
2616                 dstgroup->lim_flags = srcgroup->lim_flags;
2617                 dstgroup->max_rfer = srcgroup->max_rfer;
2618                 dstgroup->max_excl = srcgroup->max_excl;
2619                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2620                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2621
2622                 qgroup_dirty(fs_info, dstgroup);
2623                 qgroup_dirty(fs_info, srcgroup);
2624         }
2625
2626         if (!inherit)
2627                 goto unlock;
2628
2629         i_qgroups = (u64 *)(inherit + 1);
2630         for (i = 0; i < inherit->num_qgroups; ++i) {
2631                 ret = add_relation_rb(quota_root->fs_info, objectid,
2632                                       *i_qgroups);
2633                 if (ret)
2634                         goto unlock;
2635                 ++i_qgroups;
2636         }
2637
2638         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2639                 struct btrfs_qgroup *src;
2640                 struct btrfs_qgroup *dst;
2641
2642                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2643                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2644
2645                 if (!src || !dst) {
2646                         ret = -EINVAL;
2647                         goto unlock;
2648                 }
2649
2650                 dst->rfer = src->rfer - level_size;
2651                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2652                 i_qgroups += 2;
2653         }
2654         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2655                 struct btrfs_qgroup *src;
2656                 struct btrfs_qgroup *dst;
2657
2658                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2659                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2660
2661                 if (!src || !dst) {
2662                         ret = -EINVAL;
2663                         goto unlock;
2664                 }
2665
2666                 dst->excl = src->excl + level_size;
2667                 dst->excl_cmpr = src->excl_cmpr + level_size;
2668                 i_qgroups += 2;
2669         }
2670
2671 unlock:
2672         spin_unlock(&fs_info->qgroup_lock);
2673 out:
2674         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2675         return ret;
2676 }
2677
2678 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2679 {
2680         struct btrfs_root *quota_root;
2681         struct btrfs_qgroup *qgroup;
2682         struct btrfs_fs_info *fs_info = root->fs_info;
2683         u64 ref_root = root->root_key.objectid;
2684         int ret = 0;
2685         struct ulist_node *unode;
2686         struct ulist_iterator uiter;
2687
2688         if (!is_fstree(ref_root))
2689                 return 0;
2690
2691         if (num_bytes == 0)
2692                 return 0;
2693
2694         spin_lock(&fs_info->qgroup_lock);
2695         quota_root = fs_info->quota_root;
2696         if (!quota_root)
2697                 goto out;
2698
2699         qgroup = find_qgroup_rb(fs_info, ref_root);
2700         if (!qgroup)
2701                 goto out;
2702
2703         /*
2704          * in a first step, we check all affected qgroups if any limits would
2705          * be exceeded
2706          */
2707         ulist_reinit(fs_info->qgroup_ulist);
2708         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2709                         (uintptr_t)qgroup, GFP_ATOMIC);
2710         if (ret < 0)
2711                 goto out;
2712         ULIST_ITER_INIT(&uiter);
2713         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2714                 struct btrfs_qgroup *qg;
2715                 struct btrfs_qgroup_list *glist;
2716
2717                 qg = u64_to_ptr(unode->aux);
2718
2719                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2720                     qg->reserved + (s64)qg->rfer + num_bytes >
2721                     qg->max_rfer) {
2722                         ret = -EDQUOT;
2723                         goto out;
2724                 }
2725
2726                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2727                     qg->reserved + (s64)qg->excl + num_bytes >
2728                     qg->max_excl) {
2729                         ret = -EDQUOT;
2730                         goto out;
2731                 }
2732
2733                 list_for_each_entry(glist, &qg->groups, next_group) {
2734                         ret = ulist_add(fs_info->qgroup_ulist,
2735                                         glist->group->qgroupid,
2736                                         (uintptr_t)glist->group, GFP_ATOMIC);
2737                         if (ret < 0)
2738                                 goto out;
2739                 }
2740         }
2741         ret = 0;
2742         /*
2743          * no limits exceeded, now record the reservation into all qgroups
2744          */
2745         ULIST_ITER_INIT(&uiter);
2746         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2747                 struct btrfs_qgroup *qg;
2748
2749                 qg = u64_to_ptr(unode->aux);
2750
2751                 qg->reserved += num_bytes;
2752         }
2753
2754 out:
2755         spin_unlock(&fs_info->qgroup_lock);
2756         return ret;
2757 }
2758
2759 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2760 {
2761         struct btrfs_root *quota_root;
2762         struct btrfs_qgroup *qgroup;
2763         struct btrfs_fs_info *fs_info = root->fs_info;
2764         struct ulist_node *unode;
2765         struct ulist_iterator uiter;
2766         u64 ref_root = root->root_key.objectid;
2767         int ret = 0;
2768
2769         if (!is_fstree(ref_root))
2770                 return;
2771
2772         if (num_bytes == 0)
2773                 return;
2774
2775         spin_lock(&fs_info->qgroup_lock);
2776
2777         quota_root = fs_info->quota_root;
2778         if (!quota_root)
2779                 goto out;
2780
2781         qgroup = find_qgroup_rb(fs_info, ref_root);
2782         if (!qgroup)
2783                 goto out;
2784
2785         ulist_reinit(fs_info->qgroup_ulist);
2786         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2787                         (uintptr_t)qgroup, GFP_ATOMIC);
2788         if (ret < 0)
2789                 goto out;
2790         ULIST_ITER_INIT(&uiter);
2791         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2792                 struct btrfs_qgroup *qg;
2793                 struct btrfs_qgroup_list *glist;
2794
2795                 qg = u64_to_ptr(unode->aux);
2796
2797                 qg->reserved -= num_bytes;
2798
2799                 list_for_each_entry(glist, &qg->groups, next_group) {
2800                         ret = ulist_add(fs_info->qgroup_ulist,
2801                                         glist->group->qgroupid,
2802                                         (uintptr_t)glist->group, GFP_ATOMIC);
2803                         if (ret < 0)
2804                                 goto out;
2805                 }
2806         }
2807
2808 out:
2809         spin_unlock(&fs_info->qgroup_lock);
2810 }
2811
2812 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2813 {
2814         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2815                 return;
2816         btrfs_err(trans->root->fs_info,
2817                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2818                 "seq is %#x.%x",
2819                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2820                 (u32)(trans->delayed_ref_elem.seq >> 32),
2821                 (u32)trans->delayed_ref_elem.seq);
2822         BUG();
2823 }
2824
2825 /*
2826  * returns < 0 on error, 0 when more leafs are to be scanned.
2827  * returns 1 when done.
2828  */
2829 static int
2830 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2831                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2832                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2833 {
2834         struct btrfs_key found;
2835         struct ulist *roots = NULL;
2836         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2837         u64 num_bytes;
2838         u64 seq;
2839         int new_roots;
2840         int slot;
2841         int ret;
2842
2843         path->leave_spinning = 1;
2844         mutex_lock(&fs_info->qgroup_rescan_lock);
2845         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2846                                          &fs_info->qgroup_rescan_progress,
2847                                          path, 1, 0);
2848
2849         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2850                  fs_info->qgroup_rescan_progress.objectid,
2851                  fs_info->qgroup_rescan_progress.type,
2852                  fs_info->qgroup_rescan_progress.offset, ret);
2853
2854         if (ret) {
2855                 /*
2856                  * The rescan is about to end, we will not be scanning any
2857                  * further blocks. We cannot unset the RESCAN flag here, because
2858                  * we want to commit the transaction if everything went well.
2859                  * To make the live accounting work in this phase, we set our
2860                  * scan progress pointer such that every real extent objectid
2861                  * will be smaller.
2862                  */
2863                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2864                 btrfs_release_path(path);
2865                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2866                 return ret;
2867         }
2868
2869         btrfs_item_key_to_cpu(path->nodes[0], &found,
2870                               btrfs_header_nritems(path->nodes[0]) - 1);
2871         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2872
2873         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2874         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2875         slot = path->slots[0];
2876         btrfs_release_path(path);
2877         mutex_unlock(&fs_info->qgroup_rescan_lock);
2878
2879         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2880                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2881                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2882                     found.type != BTRFS_METADATA_ITEM_KEY)
2883                         continue;
2884                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2885                         num_bytes = fs_info->extent_root->nodesize;
2886                 else
2887                         num_bytes = found.offset;
2888
2889                 ulist_reinit(qgroups);
2890                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2891                                            &roots);
2892                 if (ret < 0)
2893                         goto out;
2894                 spin_lock(&fs_info->qgroup_lock);
2895                 seq = fs_info->qgroup_seq;
2896                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2897
2898                 new_roots = 0;
2899                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2900                                              seq, &new_roots, 1);
2901                 if (ret < 0) {
2902                         spin_unlock(&fs_info->qgroup_lock);
2903                         ulist_free(roots);
2904                         goto out;
2905                 }
2906
2907                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2908                                              seq, 0, new_roots, 1);
2909                 if (ret < 0) {
2910                         spin_unlock(&fs_info->qgroup_lock);
2911                         ulist_free(roots);
2912                         goto out;
2913                 }
2914                 spin_unlock(&fs_info->qgroup_lock);
2915                 ulist_free(roots);
2916         }
2917 out:
2918         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2919
2920         return ret;
2921 }
2922
2923 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2924 {
2925         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2926                                                      qgroup_rescan_work);
2927         struct btrfs_path *path;
2928         struct btrfs_trans_handle *trans = NULL;
2929         struct ulist *tmp = NULL, *qgroups = NULL;
2930         struct extent_buffer *scratch_leaf = NULL;
2931         int err = -ENOMEM;
2932         int ret = 0;
2933
2934         path = btrfs_alloc_path();
2935         if (!path)
2936                 goto out;
2937         qgroups = ulist_alloc(GFP_NOFS);
2938         if (!qgroups)
2939                 goto out;
2940         tmp = ulist_alloc(GFP_NOFS);
2941         if (!tmp)
2942                 goto out;
2943         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2944         if (!scratch_leaf)
2945                 goto out;
2946
2947         err = 0;
2948         while (!err) {
2949                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2950                 if (IS_ERR(trans)) {
2951                         err = PTR_ERR(trans);
2952                         break;
2953                 }
2954                 if (!fs_info->quota_enabled) {
2955                         err = -EINTR;
2956                 } else {
2957                         err = qgroup_rescan_leaf(fs_info, path, trans,
2958                                                  qgroups, tmp, scratch_leaf);
2959                 }
2960                 if (err > 0)
2961                         btrfs_commit_transaction(trans, fs_info->fs_root);
2962                 else
2963                         btrfs_end_transaction(trans, fs_info->fs_root);
2964         }
2965
2966 out:
2967         kfree(scratch_leaf);
2968         ulist_free(qgroups);
2969         ulist_free(tmp);
2970         btrfs_free_path(path);
2971
2972         mutex_lock(&fs_info->qgroup_rescan_lock);
2973         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2974
2975         if (err > 0 &&
2976             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2977                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2978         } else if (err < 0) {
2979                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2980         }
2981         mutex_unlock(&fs_info->qgroup_rescan_lock);
2982
2983         /*
2984          * only update status, since the previous part has alreay updated the
2985          * qgroup info.
2986          */
2987         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2988         if (IS_ERR(trans)) {
2989                 err = PTR_ERR(trans);
2990                 btrfs_err(fs_info,
2991                           "fail to start transaction for status update: %d\n",
2992                           err);
2993                 goto done;
2994         }
2995         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2996         if (ret < 0) {
2997                 err = ret;
2998                 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
2999         }
3000         btrfs_end_transaction(trans, fs_info->quota_root);
3001
3002         if (err >= 0) {
3003                 btrfs_info(fs_info, "qgroup scan completed%s",
3004                         err > 0 ? " (inconsistency flag cleared)" : "");
3005         } else {
3006                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
3007         }
3008
3009 done:
3010         complete_all(&fs_info->qgroup_rescan_completion);
3011 }
3012
3013 /*
3014  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
3015  * memory required for the rescan context.
3016  */
3017 static int
3018 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
3019                    int init_flags)
3020 {
3021         int ret = 0;
3022
3023         if (!init_flags &&
3024             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
3025              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
3026                 ret = -EINVAL;
3027                 goto err;
3028         }
3029
3030         mutex_lock(&fs_info->qgroup_rescan_lock);
3031         spin_lock(&fs_info->qgroup_lock);
3032
3033         if (init_flags) {
3034                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
3035                         ret = -EINPROGRESS;
3036                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
3037                         ret = -EINVAL;
3038
3039                 if (ret) {
3040                         spin_unlock(&fs_info->qgroup_lock);
3041                         mutex_unlock(&fs_info->qgroup_rescan_lock);
3042                         goto err;
3043                 }
3044                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3045         }
3046
3047         memset(&fs_info->qgroup_rescan_progress, 0,
3048                 sizeof(fs_info->qgroup_rescan_progress));
3049         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
3050
3051         spin_unlock(&fs_info->qgroup_lock);
3052         mutex_unlock(&fs_info->qgroup_rescan_lock);
3053
3054         init_completion(&fs_info->qgroup_rescan_completion);
3055
3056         memset(&fs_info->qgroup_rescan_work, 0,
3057                sizeof(fs_info->qgroup_rescan_work));
3058         btrfs_init_work(&fs_info->qgroup_rescan_work,
3059                         btrfs_qgroup_rescan_helper,
3060                         btrfs_qgroup_rescan_worker, NULL, NULL);
3061
3062         if (ret) {
3063 err:
3064                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
3065                 return ret;
3066         }
3067
3068         return 0;
3069 }
3070
3071 static void
3072 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
3073 {
3074         struct rb_node *n;
3075         struct btrfs_qgroup *qgroup;
3076
3077         spin_lock(&fs_info->qgroup_lock);
3078         /* clear all current qgroup tracking information */
3079         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
3080                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
3081                 qgroup->rfer = 0;
3082                 qgroup->rfer_cmpr = 0;
3083                 qgroup->excl = 0;
3084                 qgroup->excl_cmpr = 0;
3085         }
3086         spin_unlock(&fs_info->qgroup_lock);
3087 }
3088
3089 int
3090 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
3091 {
3092         int ret = 0;
3093         struct btrfs_trans_handle *trans;
3094
3095         ret = qgroup_rescan_init(fs_info, 0, 1);
3096         if (ret)
3097                 return ret;
3098
3099         /*
3100          * We have set the rescan_progress to 0, which means no more
3101          * delayed refs will be accounted by btrfs_qgroup_account_ref.
3102          * However, btrfs_qgroup_account_ref may be right after its call
3103          * to btrfs_find_all_roots, in which case it would still do the
3104          * accounting.
3105          * To solve this, we're committing the transaction, which will
3106          * ensure we run all delayed refs and only after that, we are
3107          * going to clear all tracking information for a clean start.
3108          */
3109
3110         trans = btrfs_join_transaction(fs_info->fs_root);
3111         if (IS_ERR(trans)) {
3112                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3113                 return PTR_ERR(trans);
3114         }
3115         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
3116         if (ret) {
3117                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3118                 return ret;
3119         }
3120
3121         qgroup_rescan_zero_tracking(fs_info);
3122
3123         btrfs_queue_work(fs_info->qgroup_rescan_workers,
3124                          &fs_info->qgroup_rescan_work);
3125
3126         return 0;
3127 }
3128
3129 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
3130 {
3131         int running;
3132         int ret = 0;
3133
3134         mutex_lock(&fs_info->qgroup_rescan_lock);
3135         spin_lock(&fs_info->qgroup_lock);
3136         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3137         spin_unlock(&fs_info->qgroup_lock);
3138         mutex_unlock(&fs_info->qgroup_rescan_lock);
3139
3140         if (running)
3141                 ret = wait_for_completion_interruptible(
3142                                         &fs_info->qgroup_rescan_completion);
3143
3144         return ret;
3145 }
3146
3147 /*
3148  * this is only called from open_ctree where we're still single threaded, thus
3149  * locking is omitted here.
3150  */
3151 void
3152 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
3153 {
3154         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
3155                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
3156                                  &fs_info->qgroup_rescan_work);
3157 }