anolis: mm: add switch for file zero page
ANBZ: #2510 On top of the filling the zero page of the file hole, a switch is added to provide the function of turning on/off at runtime. When turning off, all zero page mappings are evicted to ensure correctness. Signed-off-by: Kaihao Bai <carlo.bai@linux.alibaba.com> Reviewed-by: zhong jiang <zhongjiang-ali@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/785 Reviewed-by: Xu Yu <xuyu@linux.alibaba.com>
This commit is contained in:
parent
3d89f15277
commit
09797fd32d
|
@ -0,0 +1,26 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_FILE_ZEROPAGE_H_
|
||||
#define _LINUX_FILE_ZEROPAGE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(file_zeropage_enabled_key);
|
||||
static inline bool file_zeropage_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&file_zeropage_enabled_key);
|
||||
}
|
||||
|
||||
extern struct page *__alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
|
||||
static inline struct page *alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
if (file_zeropage_enabled())
|
||||
return __alloc_zeropage(vma, vmf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline void unmap_zeropage(struct page *page, struct vm_area_struct *vma,
|
||||
struct address_space *mapping, struct vm_fault *vmf);
|
||||
|
||||
#endif /* _LINUX_FILE_ZEROPAGE_H_ */
|
|
@ -1742,6 +1742,11 @@ extern int mlock_fixup(struct vm_area_struct *vma,
|
|||
struct vm_area_struct **prev,
|
||||
unsigned long start, unsigned long end,
|
||||
vm_flags_t newflags);
|
||||
/*
|
||||
* Zap flag definitions
|
||||
* @ZAP_ZEROPAGE: Only unmap the zeropages in the indicated range.
|
||||
*/
|
||||
#define ZAP_ZEROPAGE 0x01
|
||||
|
||||
/*
|
||||
* Parameter block passed down to zap_pte_range in exceptional cases.
|
||||
|
@ -1751,6 +1756,7 @@ struct zap_details {
|
|||
pgoff_t first_index; /* Lowest page->index to unmap */
|
||||
pgoff_t last_index; /* Highest page->index to unmap */
|
||||
struct page *single_page; /* Locked page to be unmapped */
|
||||
unsigned int flags; /* Flags to indicate pages to unmap */
|
||||
};
|
||||
|
||||
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
@ -1801,6 +1807,7 @@ extern int fixup_user_fault(struct mm_struct *mm,
|
|||
void unmap_mapping_page(struct page *page);
|
||||
void unmap_mapping_pages(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t nr, bool even_cows);
|
||||
void unmap_mapping_zeropages(struct address_space *mapping);
|
||||
void unmap_mapping_range(struct address_space *mapping,
|
||||
loff_t const holebegin, loff_t const holelen, int even_cows);
|
||||
#else
|
||||
|
@ -1822,6 +1829,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
|
|||
static inline void unmap_mapping_page(struct page *page) { }
|
||||
static inline void unmap_mapping_pages(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t nr, bool even_cows) { }
|
||||
static inline void unmap_mapping_zeropages(struct address_space *mapping) { }
|
||||
static inline void unmap_mapping_range(struct address_space *mapping,
|
||||
loff_t const holebegin, loff_t const holelen, int even_cows) { }
|
||||
#endif
|
||||
|
|
|
@ -30,6 +30,7 @@ enum mapping_flags {
|
|||
/* writeback related tags are not used */
|
||||
AS_NO_WRITEBACK_TAGS = 5,
|
||||
AS_THP_SUPPORT = 6, /* THPs supported */
|
||||
AS_ZEROPAGE = 7, /* Filled file hole with zero page */
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -126,6 +127,21 @@ static inline bool mapping_thp_support(struct address_space *mapping)
|
|||
return test_bit(AS_THP_SUPPORT, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline void mapping_set_zeropage(struct address_space *mapping)
|
||||
{
|
||||
test_and_set_bit(AS_ZEROPAGE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline void mapping_clear_zeropage(struct address_space *mapping)
|
||||
{
|
||||
clear_bit(AS_ZEROPAGE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline bool mapping_zeropage(struct address_space *mapping)
|
||||
{
|
||||
return test_bit(AS_ZEROPAGE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline int filemap_nr_thps(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
|
|
|
@ -66,7 +66,7 @@ obj-y += init-mm.o
|
|||
obj-y += memblock.o
|
||||
obj-y += dma_page_copy.o
|
||||
obj-y += $(memory-hotplug-y)
|
||||
|
||||
obj-y += file_zeropage.o
|
||||
ifdef CONFIG_MMU
|
||||
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/init.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/file_zeropage.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(file_zeropage_enabled_key);
|
||||
|
||||
struct page *__alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
|
||||
if (vmf && !mm_forbids_zeropage(vma->vm_mm) &&
|
||||
!(vma->vm_flags & VM_SHARED) &&
|
||||
!(vmf->flags & FAULT_FLAG_NONZEROPAGE)) {
|
||||
page = ZERO_PAGE(0);
|
||||
get_page(page);
|
||||
}
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
inline void unmap_zeropage(struct page *page, struct vm_area_struct *vma,
|
||||
struct address_space *mapping, struct vm_fault *vmf)
|
||||
{
|
||||
if (mapping_zeropage(mapping) && page && vmf && (vma->vm_flags & VM_SHARED))
|
||||
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
|
||||
}
|
||||
|
||||
static void iterate_unmap_mapping(struct mm_struct *mm)
|
||||
{
|
||||
struct vm_area_struct *vma = mm->mmap;
|
||||
|
||||
while (vma) {
|
||||
/* Only evict the file mapping that mapped by MMAP_PRIVATE */
|
||||
if (vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
/*
|
||||
* If filling zero pages is disabled, should evict all zero pages mapped
|
||||
* in the vma before actually do page fualt.
|
||||
*/
|
||||
if (mapping_zeropage(mapping)) {
|
||||
unmap_mapping_zeropages(mapping);
|
||||
/*
|
||||
* Clear the flag because the corresponding zero
|
||||
* page has been unmapped.
|
||||
*/
|
||||
mapping_clear_zeropage(mapping);
|
||||
}
|
||||
}
|
||||
|
||||
vma = vma->vm_next;
|
||||
}
|
||||
}
|
||||
|
||||
static int __init setup_file_zeropage(char *s)
|
||||
{
|
||||
if (!strcmp(s, "1"))
|
||||
static_branch_enable(&file_zeropage_enabled_key);
|
||||
else if (!strcmp(s, "0"))
|
||||
static_branch_disable(&file_zeropage_enabled_key);
|
||||
return 1;
|
||||
}
|
||||
__setup("file_zeropage=", setup_file_zeropage);
|
||||
|
||||
static ssize_t file_zeropage_enabled_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", !!static_branch_unlikely(&file_zeropage_enabled_key));
|
||||
}
|
||||
|
||||
static ssize_t file_zeropage_enabled_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
static DEFINE_MUTEX(mutex);
|
||||
struct task_struct *p;
|
||||
struct mm_struct *mm;
|
||||
ssize_t ret = count;
|
||||
|
||||
mutex_lock(&mutex);
|
||||
|
||||
if (!strncmp(buf, "1", 1))
|
||||
static_branch_enable(&file_zeropage_enabled_key);
|
||||
else if (!strncmp(buf, "0", 1)) {
|
||||
static_branch_disable(&file_zeropage_enabled_key);
|
||||
/*
|
||||
* Evict all zero pages that mapped at the file hole.
|
||||
*
|
||||
* Lock the mm_semaphore that each VMA mapped with MMAP_SHARED
|
||||
* to avoid do page fault at the MMAP_SHARED VMA and insert the
|
||||
* page into page cache, meanwhile the same offset is filled
|
||||
* by zero page in other processes.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process(p) {
|
||||
/* Iterate the mm of each task */
|
||||
mm = get_task_mm(p);
|
||||
if (mm) {
|
||||
iterate_unmap_mapping(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
} else
|
||||
ret = -EINVAL;
|
||||
|
||||
mutex_unlock(&mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kobj_attribute file_zeropage_enabled_attr =
|
||||
__ATTR(enabled, 0644, file_zeropage_enabled_show,
|
||||
file_zeropage_enabled_store);
|
||||
|
||||
static struct attribute *file_zeropage_attrs[] = {
|
||||
&file_zeropage_enabled_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group file_zeropage_attr_group = {
|
||||
.attrs = file_zeropage_attrs,
|
||||
.name = "file_zeropage",
|
||||
};
|
||||
|
||||
static int __init file_zeropage_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = sysfs_create_group(mm_kobj, &file_zeropage_attr_group);
|
||||
if (err) {
|
||||
pr_err("file_zeropage: register sysfs failed\n");
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(file_zeropage_init);
|
37
mm/memory.c
37
mm/memory.c
|
@ -1263,12 +1263,23 @@ again:
|
|||
if (details->check_mapping &&
|
||||
details->check_mapping != page_rmapping(page))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* unmap_mapping_zeropages() only unmaps zero
|
||||
* pages filled in the VMA. Page cache should
|
||||
* not be unmapped.
|
||||
*/
|
||||
if (unlikely(details->flags & ZAP_ZEROPAGE))
|
||||
continue;
|
||||
}
|
||||
ptent = ptep_get_and_clear_full(mm, addr, pte,
|
||||
tlb->fullmm);
|
||||
tlb_remove_tlb_entry(tlb, pte, addr);
|
||||
if (unlikely(!page))
|
||||
if (unlikely(!page)) {
|
||||
if (unlikely(details && (details->flags & ZAP_ZEROPAGE)))
|
||||
force_flush = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!PageAnon(page)) {
|
||||
if (pte_dirty(ptent)) {
|
||||
|
@ -3310,6 +3321,30 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
|
|||
i_mmap_unlock_write(mapping);
|
||||
}
|
||||
|
||||
/**
|
||||
* unmap_mapping_zeropages() - Unmap zeropages from processes.
|
||||
* @mapping: The address space containing pages to be unmapped.
|
||||
* @start: Index of first page to be unmapped.
|
||||
* @nr: Number of pages to be unmapped. 0 to unmap to end of file.
|
||||
*
|
||||
* Unmap the zero pages in this address space from any userspace process which
|
||||
* has them mmaped.
|
||||
*/
|
||||
void unmap_mapping_zeropages(struct address_space *mapping)
|
||||
{
|
||||
struct zap_details details = { };
|
||||
|
||||
details.check_mapping = mapping;
|
||||
details.first_index = 0;
|
||||
details.last_index = ULONG_MAX;
|
||||
details.flags = ZAP_ZEROPAGE;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
i_mmap_unlock_write(mapping);
|
||||
}
|
||||
|
||||
/**
|
||||
* unmap_mapping_range - unmap the portion of all mmaps in the specified
|
||||
* address_space corresponding to the specified byte range in the underlying
|
||||
|
|
21
mm/shmem.c
21
mm/shmem.c
|
@ -38,6 +38,7 @@
|
|||
#include <linux/hugetlb.h>
|
||||
#include <linux/frontswap.h>
|
||||
#include <linux/fs_parser.h>
|
||||
#include <linux/file_zeropage.h>
|
||||
|
||||
#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
|
||||
|
||||
|
@ -1898,13 +1899,10 @@ alloc_huge:
|
|||
page = shmem_alloc_and_acct_page(gfp, inode, index, true);
|
||||
if (IS_ERR(page)) {
|
||||
alloc_nohuge:
|
||||
if (vmf && !mm_forbids_zeropage(vma->vm_mm) &&
|
||||
!(vma->vm_flags & VM_SHARED) &&
|
||||
!(vmf->flags & FAULT_FLAG_NONZEROPAGE)) {
|
||||
page = ZERO_PAGE(0);
|
||||
get_page(page);
|
||||
page = alloc_zeropage(vma, vmf);
|
||||
if (page)
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = shmem_alloc_and_acct_page(gfp, inode,
|
||||
index, false);
|
||||
}
|
||||
|
@ -2007,13 +2005,13 @@ clear:
|
|||
error = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the VMA that fault page belongs to is VM_SHARED, we should unmap all
|
||||
* zero page mappings to make the MMAP_PRIVATE VMA do page fault again
|
||||
* to catch page cache.
|
||||
*/
|
||||
if (page && vmf && (vma->vm_flags & VM_SHARED))
|
||||
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
|
||||
unmap_zeropage(page, vma, mapping, vmf);
|
||||
|
||||
out:
|
||||
*pagep = page + index - hindex;
|
||||
|
@ -2031,8 +2029,7 @@ unacct:
|
|||
goto alloc_nohuge;
|
||||
}
|
||||
unlock:
|
||||
if (page && vmf && (vma->vm_flags & VM_SHARED))
|
||||
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
|
||||
unmap_zeropage(page, vma, mapping, vmf);
|
||||
|
||||
if (page) {
|
||||
unlock_page(page);
|
||||
|
@ -2141,6 +2138,10 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
|
|||
gfp, vma, vmf, &ret);
|
||||
if (err)
|
||||
return vmf_error(err);
|
||||
|
||||
if (is_zero_page(vmf->page))
|
||||
mapping_set_zeropage(inode->i_mapping);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue