#include <linux/atomic.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
+#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
might_sleep();
do {
- void *addr;
+ void __pmem *addr;
unsigned long pfn;
long count;
unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
if (pgsz > count)
pgsz = count;
- if (pgsz < PAGE_SIZE)
- memset(addr, 0, pgsz);
- else
- clear_page(addr);
+ clear_pmem(addr, pgsz);
addr += pgsz;
size -= pgsz;
count -= pgsz;
}
} while (size);
+ wmb_pmem();
return 0;
}
EXPORT_SYMBOL_GPL(dax_clear_blocks);
-static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
+static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
+ unsigned blkbits)
{
unsigned long pfn;
sector_t sector = bh->b_blocknr << (blkbits - 9);
return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
}
-static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
- loff_t end)
+/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
+static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
+ loff_t pos, loff_t end)
{
loff_t final = end - pos + first; /* The final byte of the buffer */
if (first > 0)
- memset(addr, 0, first);
+ clear_pmem(addr, first);
if (final < size)
- memset(addr + final, 0, size - final);
+ clear_pmem(addr + final, size - final);
}
static bool buffer_written(struct buffer_head *bh)
loff_t pos = start;
loff_t max = start;
loff_t bh_max = start;
- void *addr;
+ void __pmem *addr;
bool hole = false;
+ bool need_wmb = false;
if (iov_iter_rw(iter) != WRITE)
end = min(end, i_size_read(inode));
while (pos < end) {
- unsigned len;
+ size_t len;
if (pos == max) {
unsigned blkbits = inode->i_blkbits;
- sector_t block = pos >> blkbits;
+ long page = pos >> PAGE_SHIFT;
+ sector_t block = page << (PAGE_SHIFT - blkbits);
unsigned first = pos - (block << blkbits);
long size;
retval = dax_get_addr(bh, &addr, blkbits);
if (retval < 0)
break;
- if (buffer_unwritten(bh) || buffer_new(bh))
+ if (buffer_unwritten(bh) || buffer_new(bh)) {
dax_new_buf(addr, retval, first, pos,
end);
+ need_wmb = true;
+ }
addr += first;
size = retval - first;
}
max = min(pos + size, end);
}
- if (iov_iter_rw(iter) == WRITE)
- len = copy_from_iter_nocache(addr, max - pos, iter);
- else if (!hole)
- len = copy_to_iter(addr, max - pos, iter);
+ if (iov_iter_rw(iter) == WRITE) {
+ len = copy_from_iter_pmem(addr, max - pos, iter);
+ need_wmb = true;
+ } else if (!hole)
+ len = copy_to_iter((void __force *)addr, max - pos,
+ iter);
else
len = iov_iter_zero(max - pos, iter);
addr += len;
}
+ if (need_wmb)
+ wmb_pmem();
+
return (pos == start) ? retval : pos - start;
}
static int copy_user_bh(struct page *to, struct buffer_head *bh,
unsigned blkbits, unsigned long vaddr)
{
- void *vfrom, *vto;
+ void __pmem *vfrom;
+ void *vto;
+
if (dax_get_addr(bh, &vfrom, blkbits) < 0)
return -EIO;
vto = kmap_atomic(to);
- copy_user_page(vto, vfrom, vaddr, to);
+ copy_user_page(vto, (void __force *)vfrom, vaddr, to);
kunmap_atomic(vto);
return 0;
}
{
sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- void *addr;
+ void __pmem *addr;
unsigned long pfn;
pgoff_t size;
int error;
goto out;
}
- if (buffer_unwritten(bh) || buffer_new(bh))
- clear_page(addr);
+ if (buffer_unwritten(bh) || buffer_new(bh)) {
+ clear_pmem(addr, PAGE_SIZE);
+ wmb_pmem();
+ }
error = vm_insert_mixed(vma, vaddr, pfn);
unsigned long pmd_addr = address & PMD_MASK;
bool write = flags & FAULT_FLAG_WRITE;
long length;
- void *kaddr;
+ void __pmem *kaddr;
pgoff_t size, pgoff;
sector_t block, sector;
unsigned long pfn;
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
return VM_FAULT_FALLBACK;
- pgoff = ((pmd_addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ pgoff = linear_page_index(vma, pmd_addr);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (pgoff >= size)
return VM_FAULT_SIGBUS;
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;
+ if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+ int i;
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
+ wmb_pmem();
+ count_vm_event(PGMAJFAULT);
+ mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ result |= VM_FAULT_MAJOR;
+ }
+
+ /*
+ * If we allocated new storage, make sure no process has any
+ * zero pages covering this hole
+ */
+ if (buffer_new(&bh)) {
+ i_mmap_unlock_write(mapping);
+ unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
+ i_mmap_lock_write(mapping);
+ }
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
if ((pgoff | PG_PMD_COLOUR) >= size)
goto fallback;
- if (is_huge_zero_pmd(*pmd))
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-
if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
- bool set;
spinlock_t *ptl;
- struct mm_struct *mm = vma->vm_mm;
+ pmd_t entry;
struct page *zero_page = get_huge_zero_page();
+
if (unlikely(!zero_page))
goto fallback;
- ptl = pmd_lock(mm, pmd);
- set = set_huge_zero_page(NULL, mm, vma, pmd_addr, pmd,
- zero_page);
- spin_unlock(ptl);
+ ptl = pmd_lock(vma->vm_mm, pmd);
+ if (!pmd_none(*pmd)) {
+ spin_unlock(ptl);
+ goto fallback;
+ }
+
+ entry = mk_pmd(zero_page, vma->vm_page_prot);
+ entry = pmd_mkhuge(entry);
+ set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry);
result = VM_FAULT_NOPAGE;
+ spin_unlock(ptl);
} else {
sector = bh.b_blocknr << (blkbits - 9);
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
goto fallback;
- if (buffer_unwritten(&bh) || buffer_new(&bh)) {
- int i;
- for (i = 0; i < PTRS_PER_PMD; i++)
- clear_page(kaddr + i * PAGE_SIZE);
- count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
- result |= VM_FAULT_MAJOR;
- }
-
result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
}
if (err < 0)
return err;
if (buffer_written(&bh)) {
- void *addr;
+ void __pmem *addr;
err = dax_get_addr(&bh, &addr, inode->i_blkbits);
if (err < 0)
return err;
- memset(addr + offset, 0, length);
+ clear_pmem(addr + offset, length);
+ wmb_pmem();
}
return 0;