anolis: mm: add switch for file zero page

ANBZ: #2510

On top of the filling the zero page of the file hole, a switch is
added to provide the function of turning on/off at runtime. When
turning off, all zero page mappings are evicted to ensure correctness.

Signed-off-by: Kaihao Bai <carlo.bai@linux.alibaba.com>
Reviewed-by: zhong jiang <zhongjiang-ali@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/785
Reviewed-by: Xu Yu <xuyu@linux.alibaba.com>
This commit is contained in:
Kaihao Bai 2022-08-29 11:26:42 +08:00 committed by 小龙
parent 3d89f15277
commit 09797fd32d
7 changed files with 245 additions and 12 deletions

View File

@ -0,0 +1,26 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FILE_ZEROPAGE_H_
#define _LINUX_FILE_ZEROPAGE_H_
#include <linux/types.h>
#include <linux/jump_label.h>
DECLARE_STATIC_KEY_FALSE(file_zeropage_enabled_key);
static inline bool file_zeropage_enabled(void)
{
return static_branch_unlikely(&file_zeropage_enabled_key);
}
extern struct page *__alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf);
static inline struct page *alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf)
{
if (file_zeropage_enabled())
return __alloc_zeropage(vma, vmf);
return NULL;
}
inline void unmap_zeropage(struct page *page, struct vm_area_struct *vma,
struct address_space *mapping, struct vm_fault *vmf);
#endif /* _LINUX_FILE_ZEROPAGE_H_ */

View File

@ -1742,6 +1742,11 @@ extern int mlock_fixup(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end,
vm_flags_t newflags);
/*
* Zap flag definitions
* @ZAP_ZEROPAGE: Only unmap the zeropages in the indicated range.
*/
#define ZAP_ZEROPAGE 0x01
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
@ -1751,6 +1756,7 @@ struct zap_details {
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
struct page *single_page; /* Locked page to be unmapped */
unsigned int flags; /* Flags to indicate pages to unmap */
};
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@ -1801,6 +1807,7 @@ extern int fixup_user_fault(struct mm_struct *mm,
void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_zeropages(struct address_space *mapping);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
#else
@ -1822,6 +1829,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_zeropages(struct address_space *mapping) { }
static inline void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows) { }
#endif

View File

@ -30,6 +30,7 @@ enum mapping_flags {
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
AS_THP_SUPPORT = 6, /* THPs supported */
AS_ZEROPAGE = 7, /* Filled file hole with zero page */
};
/**
@ -126,6 +127,21 @@ static inline bool mapping_thp_support(struct address_space *mapping)
return test_bit(AS_THP_SUPPORT, &mapping->flags);
}
static inline void mapping_set_zeropage(struct address_space *mapping)
{
test_and_set_bit(AS_ZEROPAGE, &mapping->flags);
}
static inline void mapping_clear_zeropage(struct address_space *mapping)
{
clear_bit(AS_ZEROPAGE, &mapping->flags);
}
static inline bool mapping_zeropage(struct address_space *mapping)
{
return test_bit(AS_ZEROPAGE, &mapping->flags);
}
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS

View File

@ -66,7 +66,7 @@ obj-y += init-mm.o
obj-y += memblock.o
obj-y += dma_page_copy.o
obj-y += $(memory-hotplug-y)
obj-y += file_zeropage.o
ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
endif

147
mm/file_zeropage.c Normal file
View File

@ -0,0 +1,147 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/rmap.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/file_zeropage.h>
#include <linux/pagemap.h>
#include <linux/pgtable.h>
DEFINE_STATIC_KEY_FALSE(file_zeropage_enabled_key);
struct page *__alloc_zeropage(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = NULL;
if (vmf && !mm_forbids_zeropage(vma->vm_mm) &&
!(vma->vm_flags & VM_SHARED) &&
!(vmf->flags & FAULT_FLAG_NONZEROPAGE)) {
page = ZERO_PAGE(0);
get_page(page);
}
return page;
}
inline void unmap_zeropage(struct page *page, struct vm_area_struct *vma,
struct address_space *mapping, struct vm_fault *vmf)
{
if (mapping_zeropage(mapping) && page && vmf && (vma->vm_flags & VM_SHARED))
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
}
static void iterate_unmap_mapping(struct mm_struct *mm)
{
struct vm_area_struct *vma = mm->mmap;
while (vma) {
/* Only evict the file mapping that mapped by MMAP_PRIVATE */
if (vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
struct address_space *mapping = vma->vm_file->f_mapping;
/*
* If filling zero pages is disabled, should evict all zero pages mapped
* in the vma before actually do page fualt.
*/
if (mapping_zeropage(mapping)) {
unmap_mapping_zeropages(mapping);
/*
* Clear the flag because the corresponding zero
* page has been unmapped.
*/
mapping_clear_zeropage(mapping);
}
}
vma = vma->vm_next;
}
}
static int __init setup_file_zeropage(char *s)
{
if (!strcmp(s, "1"))
static_branch_enable(&file_zeropage_enabled_key);
else if (!strcmp(s, "0"))
static_branch_disable(&file_zeropage_enabled_key);
return 1;
}
__setup("file_zeropage=", setup_file_zeropage);
static ssize_t file_zeropage_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", !!static_branch_unlikely(&file_zeropage_enabled_key));
}
static ssize_t file_zeropage_enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
static DEFINE_MUTEX(mutex);
struct task_struct *p;
struct mm_struct *mm;
ssize_t ret = count;
mutex_lock(&mutex);
if (!strncmp(buf, "1", 1))
static_branch_enable(&file_zeropage_enabled_key);
else if (!strncmp(buf, "0", 1)) {
static_branch_disable(&file_zeropage_enabled_key);
/*
* Evict all zero pages that mapped at the file hole.
*
* Lock the mm_semaphore that each VMA mapped with MMAP_SHARED
* to avoid do page fault at the MMAP_SHARED VMA and insert the
* page into page cache, meanwhile the same offset is filled
* by zero page in other processes.
*/
read_lock(&tasklist_lock);
for_each_process(p) {
/* Iterate the mm of each task */
mm = get_task_mm(p);
if (mm) {
iterate_unmap_mapping(mm);
mmput(mm);
}
}
read_unlock(&tasklist_lock);
} else
ret = -EINVAL;
mutex_unlock(&mutex);
return ret;
}
static struct kobj_attribute file_zeropage_enabled_attr =
__ATTR(enabled, 0644, file_zeropage_enabled_show,
file_zeropage_enabled_store);
static struct attribute *file_zeropage_attrs[] = {
&file_zeropage_enabled_attr.attr,
NULL,
};
static const struct attribute_group file_zeropage_attr_group = {
.attrs = file_zeropage_attrs,
.name = "file_zeropage",
};
static int __init file_zeropage_init(void)
{
int err;
err = sysfs_create_group(mm_kobj, &file_zeropage_attr_group);
if (err) {
pr_err("file_zeropage: register sysfs failed\n");
return err;
}
return 0;
}
subsys_initcall(file_zeropage_init);

View File

@ -1263,12 +1263,23 @@ again:
if (details->check_mapping &&
details->check_mapping != page_rmapping(page))
continue;
/*
* unmap_mapping_zeropages() only unmaps zero
* pages filled in the VMA. Page cache should
* not be unmapped.
*/
if (unlikely(details->flags & ZAP_ZEROPAGE))
continue;
}
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
if (unlikely(!page)) {
if (unlikely(details && (details->flags & ZAP_ZEROPAGE)))
force_flush = 1;
continue;
}
if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
@ -3310,6 +3321,30 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
i_mmap_unlock_write(mapping);
}
/**
* unmap_mapping_zeropages() - Unmap zeropages from processes.
* @mapping: The address space containing pages to be unmapped.
* @start: Index of first page to be unmapped.
* @nr: Number of pages to be unmapped. 0 to unmap to end of file.
*
* Unmap the zero pages in this address space from any userspace process which
* has them mmaped.
*/
void unmap_mapping_zeropages(struct address_space *mapping)
{
struct zap_details details = { };
details.check_mapping = mapping;
details.first_index = 0;
details.last_index = ULONG_MAX;
details.flags = ZAP_ZEROPAGE;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
i_mmap_unlock_write(mapping);
}
/**
* unmap_mapping_range - unmap the portion of all mmaps in the specified
* address_space corresponding to the specified byte range in the underlying

View File

@ -38,6 +38,7 @@
#include <linux/hugetlb.h>
#include <linux/frontswap.h>
#include <linux/fs_parser.h>
#include <linux/file_zeropage.h>
#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
@ -1898,13 +1899,10 @@ alloc_huge:
page = shmem_alloc_and_acct_page(gfp, inode, index, true);
if (IS_ERR(page)) {
alloc_nohuge:
if (vmf && !mm_forbids_zeropage(vma->vm_mm) &&
!(vma->vm_flags & VM_SHARED) &&
!(vmf->flags & FAULT_FLAG_NONZEROPAGE)) {
page = ZERO_PAGE(0);
get_page(page);
page = alloc_zeropage(vma, vmf);
if (page)
goto out;
}
page = shmem_alloc_and_acct_page(gfp, inode,
index, false);
}
@ -2007,13 +2005,13 @@ clear:
error = -EINVAL;
goto unlock;
}
/*
* If the VMA that fault page belongs to is VM_SHARED, we should unmap all
* zero page mappings to make the MMAP_PRIVATE VMA do page fault again
* to catch page cache.
*/
if (page && vmf && (vma->vm_flags & VM_SHARED))
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
unmap_zeropage(page, vma, mapping, vmf);
out:
*pagep = page + index - hindex;
@ -2031,8 +2029,7 @@ unacct:
goto alloc_nohuge;
}
unlock:
if (page && vmf && (vma->vm_flags & VM_SHARED))
try_to_unmap_zeropage(page, TTU_ZEROPAGE);
unmap_zeropage(page, vma, mapping, vmf);
if (page) {
unlock_page(page);
@ -2141,6 +2138,10 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
gfp, vma, vmf, &ret);
if (err)
return vmf_error(err);
if (is_zero_page(vmf->page))
mapping_set_zeropage(inode->i_mapping);
return ret;
}