#include <asm/div64.h>
#include <linux/blkdev.h> /* sector_div */
#include <linux/pid_namespace.h>
+#include <linux/fs_pin.h>
/*
* These constants control the amount of freespace that suspend and
static void do_acct_process(struct bsd_acct_struct *acct);
struct bsd_acct_struct {
- long count;
+ struct fs_pin pin;
struct mutex lock;
int active;
unsigned long needcheck;
struct file *file;
struct pid_namespace *ns;
- struct list_head list;
+ struct work_struct work;
+ struct completion done;
};
-static DEFINE_SPINLOCK(acct_lock);
-static LIST_HEAD(acct_list);
-
/*
* Check the amount of free space and suspend/resume accordingly.
*/
do_div(suspend, 100);
if (sbuf.f_bavail <= suspend) {
acct->active = 0;
- printk(KERN_INFO "Process accounting paused\n");
+ pr_info("Process accounting paused\n");
}
} else {
u64 resume = sbuf.f_blocks * RESUME;
do_div(resume, 100);
if (sbuf.f_bavail >= resume) {
acct->active = 1;
- printk(KERN_INFO "Process accounting resumed\n");
+ pr_info("Process accounting resumed\n");
}
}
return acct->active;
}
-static void acct_put(struct bsd_acct_struct *p)
-{
- spin_lock(&acct_lock);
- if (!--p->count)
- kfree(p);
- spin_unlock(&acct_lock);
-}
-
-static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p)
+static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
{
struct bsd_acct_struct *res;
- spin_lock(&acct_lock);
again:
- res = *p;
- if (res)
- res->count++;
- spin_unlock(&acct_lock);
- if (res) {
- mutex_lock(&res->lock);
- if (!res->ns) {
- mutex_unlock(&res->lock);
- spin_lock(&acct_lock);
- if (!--res->count)
- kfree(res);
- goto again;
- }
+ smp_rmb();
+ rcu_read_lock();
+ res = ACCESS_ONCE(ns->bacct);
+ if (!res) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ if (!atomic_long_inc_not_zero(&res->pin.count)) {
+ rcu_read_unlock();
+ cpu_relax();
+ goto again;
+ }
+ rcu_read_unlock();
+ mutex_lock(&res->lock);
+ if (!res->ns) {
+ mutex_unlock(&res->lock);
+ pin_put(&res->pin);
+ goto again;
}
return res;
}
+static void close_work(struct work_struct *work)
+{
+ struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
+ struct file *file = acct->file;
+ if (file->f_op->flush)
+ file->f_op->flush(file, NULL);
+ __fput_sync(file);
+ complete(&acct->done);
+}
+
static void acct_kill(struct bsd_acct_struct *acct,
struct bsd_acct_struct *new)
{
if (acct) {
- struct file *file = acct->file;
struct pid_namespace *ns = acct->ns;
- spin_lock(&acct_lock);
- list_del(&acct->list);
- mnt_unpin(file->f_path.mnt);
- spin_unlock(&acct_lock);
do_acct_process(acct);
- filp_close(file, NULL);
- spin_lock(&acct_lock);
+ INIT_WORK(&acct->work, close_work);
+ init_completion(&acct->done);
+ schedule_work(&acct->work);
+ wait_for_completion(&acct->done);
+ pin_remove(&acct->pin);
ns->bacct = new;
- if (new) {
- mnt_pin(new->file->f_path.mnt);
- list_add(&new->list, &acct_list);
- }
acct->ns = NULL;
+ atomic_long_dec(&acct->pin.count);
mutex_unlock(&acct->lock);
- if (!(acct->count -= 2))
- kfree(acct);
- spin_unlock(&acct_lock);
+ pin_put(&acct->pin);
}
}
+static void acct_pin_kill(struct fs_pin *pin)
+{
+ struct bsd_acct_struct *acct;
+ acct = container_of(pin, struct bsd_acct_struct, pin);
+ mutex_lock(&acct->lock);
+ if (!acct->ns) {
+ mutex_unlock(&acct->lock);
+ pin_put(pin);
+ acct = NULL;
+ }
+ acct_kill(acct, NULL);
+}
+
static int acct_on(struct filename *pathname)
{
struct file *file;
- struct vfsmount *mnt;
+ struct vfsmount *mnt, *internal;
struct pid_namespace *ns = task_active_pid_ns(current);
struct bsd_acct_struct *acct, *old;
+ int err;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
if (!acct)
filp_close(file, NULL);
return -EIO;
}
+ internal = mnt_clone_internal(&file->f_path);
+ if (IS_ERR(internal)) {
+ kfree(acct);
+ filp_close(file, NULL);
+ return PTR_ERR(internal);
+ }
+ err = mnt_want_write(internal);
+ if (err) {
+ mntput(internal);
+ kfree(acct);
+ filp_close(file, NULL);
+ return err;
+ }
+ mnt = file->f_path.mnt;
+ file->f_path.mnt = internal;
- acct->count = 1;
+ atomic_long_set(&acct->pin.count, 1);
+ acct->pin.kill = acct_pin_kill;
acct->file = file;
acct->needcheck = jiffies;
acct->ns = ns;
mutex_init(&acct->lock);
- mnt = file->f_path.mnt;
+ mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
+ pin_insert(&acct->pin, mnt);
- old = acct_get(&ns->bacct);
- if (old) {
+ old = acct_get(ns);
+ if (old)
acct_kill(old, acct);
- } else {
- spin_lock(&acct_lock);
+ else
ns->bacct = acct;
- mnt_pin(mnt);
- list_add(&acct->list, &acct_list);
- spin_unlock(&acct_lock);
- }
- mntput(mnt); /* it's pinned, now give up active reference */
+ mutex_unlock(&acct->lock);
+ mnt_drop_write(mnt);
+ mntput(mnt);
return 0;
}
if (name) {
struct filename *tmp = getname(name);
+
if (IS_ERR(tmp))
return PTR_ERR(tmp);
mutex_lock(&acct_on_mutex);
mutex_unlock(&acct_on_mutex);
putname(tmp);
} else {
- acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL);
+ acct_kill(acct_get(task_active_pid_ns(current)), NULL);
}
return error;
}
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @m: vfsmount being shut down
- *
- * If the accounting is turned on for a file in the subtree pointed to
- * to by m, turn accounting off. Done when m is about to die.
- */
-void acct_auto_close_mnt(struct vfsmount *m)
-{
- struct bsd_acct_struct *acct;
-
- spin_lock(&acct_lock);
-restart:
- list_for_each_entry(acct, &acct_list, list)
- if (acct->file->f_path.mnt == m) {
- acct->count++;
- spin_unlock(&acct_lock);
- mutex_lock(&acct->lock);
- if (!acct->ns) {
- mutex_unlock(&acct->lock);
- spin_lock(&acct_lock);
- if (!--acct->count)
- kfree(acct);
- goto restart;
- }
- acct_kill(acct, NULL);
- spin_lock(&acct_lock);
- goto restart;
- }
- spin_unlock(&acct_lock);
-}
-
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @sb: super block for the filesystem
- *
- * If the accounting is turned on for a file in the filesystem pointed
- * to by sb, turn accounting off.
- */
-void acct_auto_close(struct super_block *sb)
-{
- struct bsd_acct_struct *acct;
-
- spin_lock(&acct_lock);
-restart:
- list_for_each_entry(acct, &acct_list, list)
- if (acct->file->f_path.dentry->d_sb == sb) {
- acct->count++;
- spin_unlock(&acct_lock);
- mutex_lock(&acct->lock);
- if (!acct->ns) {
- mutex_unlock(&acct->lock);
- spin_lock(&acct_lock);
- if (!--acct->count)
- kfree(acct);
- goto restart;
- }
- acct_kill(acct, NULL);
- spin_lock(&acct_lock);
- goto restart;
- }
- spin_unlock(&acct_lock);
-}
-
void acct_exit_ns(struct pid_namespace *ns)
{
- acct_kill(acct_get(&ns->bacct), NULL);
+ acct_kill(acct_get(ns), NULL);
}
/*
return exp;
}
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/*
* encode an u64 into a comp2_t (24 bits)
*
#define MANTSIZE2 20 /* 20 bit mantissa. */
#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
-#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
+#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
static comp2_t encode_comp2_t(u64 value)
{
}
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
/*
* encode an u64 into a 32 bit IEEE float
*/
unsigned exp = 190;
unsigned u;
- if (value==0) return 0;
- while ((s64)value > 0){
+ if (value == 0)
+ return 0;
+ while ((s64)value > 0) {
value <<= 1;
exp--;
}
run_time -= current->group_leader->start_time;
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ(run_time);
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac->ac_etime = encode_float(elapsed);
#else
ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
- (unsigned long) elapsed : (unsigned long) -1l);
+ (unsigned long) elapsed : (unsigned long) -1l);
#endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
{
/* new enlarged etime field */
comp2_t etime = encode_comp2_t(elapsed);
+
ac->ac_etime_hi = etime >> 16;
ac->ac_etime_lo = (u16) etime;
}
/* we really need to bite the bullet and change layout */
ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
ac.ac_uid16 = ac.ac_uid;
ac.ac_gid16 = ac.ac_gid;
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac.ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
if (group_dead && current->mm) {
struct vm_area_struct *vma;
+
down_read(¤t->mm->mmap_sem);
vma = current->mm->mmap;
while (vma) {
static void slow_acct_process(struct pid_namespace *ns)
{
for ( ; ns; ns = ns->parent) {
- struct bsd_acct_struct *acct = acct_get(&ns->bacct);
+ struct bsd_acct_struct *acct = acct_get(ns);
if (acct) {
do_acct_process(acct);
mutex_unlock(&acct->lock);
- acct_put(acct);
+ pin_put(&acct->pin);
}
}
}