anolis: mm: pgtable_share: redesign page fault of shared vmas
ANBZ: #6632 There are three issues, tlb flush, memcg charge with shadow mm and pgtable stat. We have below modifications to solve: 1. move the process of copy shadow pmd into pgtable_share_fault(); 2. use shadow vma directly instead of original with shadow mm (it's unsafe); The first modification is ready for next: tlb flush and memcg charge by original mm. Signed-off-by: Xin Hao <xhao@linux.alibaba.com> Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Xu Yu <xuyu@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/2550
This commit is contained in:
parent
27cf437e5a
commit
f1454a7070
|
@ -46,6 +46,11 @@ static inline void mmap_read_lock(struct mm_struct *mm)
|
|||
down_read(&mm->mmap_lock);
|
||||
}
|
||||
|
||||
static inline void mmap_read_lock_nested(struct mm_struct *mm, int subclass)
|
||||
{
|
||||
down_read_nested(&mm->mmap_lock, subclass);
|
||||
}
|
||||
|
||||
static inline int mmap_read_lock_killable(struct mm_struct *mm)
|
||||
{
|
||||
return down_read_killable(&mm->mmap_lock);
|
||||
|
|
|
@ -21,6 +21,9 @@ extern unsigned long pgtable_share_get_unmapped_area(struct file *filp,
|
|||
unsigned long len,
|
||||
unsigned long pgoff,
|
||||
unsigned long flags);
|
||||
extern vm_fault_t pgtable_share_copy_pmd(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *shadow_vma,
|
||||
unsigned long addr);
|
||||
|
||||
static inline bool vma_is_pgtable_shared(const struct vm_area_struct *vma)
|
||||
{
|
||||
|
@ -64,5 +67,12 @@ static unsigned long pgtable_share_get_unmapped_area(struct file *filp,
|
|||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline vm_fault_t pgtable_share_copy_pmd(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *shadow_vma,
|
||||
unsigned long addr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
89
mm/memory.c
89
mm/memory.c
|
@ -222,6 +222,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
|
|||
if (shared_pte) {
|
||||
tlb_flush_pmd_range(tlb, addr, PAGE_SIZE);
|
||||
tlb->freed_tables = 1;
|
||||
put_page(token);
|
||||
return;
|
||||
}
|
||||
pte_free_tlb(tlb, token, addr);
|
||||
|
@ -4886,6 +4887,90 @@ unlock:
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGETABLE_SHARE
|
||||
static vm_fault_t pgtable_share_fault(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags)
|
||||
{
|
||||
struct vm_fault vmf = {
|
||||
.vma = vma,
|
||||
.address = address & PAGE_MASK,
|
||||
.flags = flags,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
.gfp_mask = __get_fault_gfp_mask(vma),
|
||||
};
|
||||
struct pgtable_share_struct *info;
|
||||
struct mm_struct *orig_mm, *shadow_mm;
|
||||
struct vm_area_struct *shadow_vma;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
vm_fault_t ret;
|
||||
|
||||
if ((!vma->vm_file) || (!vma->vm_file->f_mapping))
|
||||
return VM_FAULT_ERROR;
|
||||
|
||||
info = vma_get_pgtable_share_data(vma);
|
||||
if (!info) {
|
||||
pr_warn("VM_SHARED_PT vma with NULL pgtable_share_data");
|
||||
dump_stack_print_info(KERN_WARNING);
|
||||
return VM_FAULT_ERROR;
|
||||
}
|
||||
orig_mm = vma->vm_mm;
|
||||
shadow_mm = info->mm;
|
||||
/*
|
||||
* lock_nested() called to avoid possible warning
|
||||
* of recursive locking.
|
||||
*/
|
||||
mmap_read_lock_nested(shadow_mm, SINGLE_DEPTH_NESTING);
|
||||
|
||||
shadow_vma = find_vma(shadow_mm, address);
|
||||
if (!shadow_vma) {
|
||||
ret = VM_FAULT_SIGSEGV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Share the same pmd entry with shadow_mm. */
|
||||
ret = pgtable_share_copy_pmd(vma, shadow_vma, address);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Switch to shadow vma and shadow mm. */
|
||||
vmf.vma = shadow_vma;
|
||||
|
||||
ret = VM_FAULT_OOM;
|
||||
pgd = pgd_offset(shadow_mm, address);
|
||||
p4d = p4d_alloc(shadow_mm, pgd, address);
|
||||
if (!p4d)
|
||||
goto out;
|
||||
|
||||
vmf.pud = pud_alloc(shadow_mm, p4d, address);
|
||||
if (!vmf.pud)
|
||||
goto out;
|
||||
|
||||
vmf.pmd = pmd_alloc(shadow_mm, vmf.pud, address);
|
||||
if (!vmf.pmd)
|
||||
goto out;
|
||||
|
||||
ret = handle_pte_fault(&vmf);
|
||||
|
||||
out:
|
||||
orig_mm = vma->vm_mm;
|
||||
/*
|
||||
* Release the read lock on shared VMA's parent mm unless
|
||||
* handle_pte_fault released the lock already.
|
||||
* handle_pte_fault sets VM_FAULT_RETRY in return value if
|
||||
* it released mmap lock. Here, that means shadow mmap lock
|
||||
* has been released, meantime, we need to unlock original
|
||||
* mmap lock.
|
||||
*/
|
||||
if ((ret & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))
|
||||
mmap_read_unlock(orig_mm);
|
||||
else
|
||||
mmap_read_unlock(shadow_mm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* By the time we get here, we already hold the mm semaphore
|
||||
*
|
||||
|
@ -5100,6 +5185,10 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
|
||||
if (unlikely(is_vm_hugetlb_page(vma)))
|
||||
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
|
||||
#ifdef CONFIG_PAGETABLE_SHARE
|
||||
else if (unlikely(vma_is_pgtable_shared(vma)))
|
||||
ret = pgtable_share_fault(vma, address, flags);
|
||||
#endif
|
||||
else
|
||||
ret = __handle_mm_fault(vma, address, flags);
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/pgtable_share.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/mmdebug.h>
|
||||
|
||||
static bool vma_is_suitable_pgtable_share(struct vm_area_struct *vma)
|
||||
{
|
||||
|
@ -127,6 +128,86 @@ int pgtable_share_insert_vma(struct mm_struct *host_mm, struct vm_area_struct *v
|
|||
return err;
|
||||
}
|
||||
|
||||
static pmd_t *pgtable_share_create_pmd(struct mm_struct *mm, unsigned long addr,
|
||||
bool alloc_pte)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
p4d = p4d_alloc(mm, pgd, addr);
|
||||
if (!p4d)
|
||||
goto out;
|
||||
|
||||
pud = pud_alloc(mm, p4d, addr);
|
||||
if (!pud)
|
||||
goto out;
|
||||
|
||||
pmd = pmd_alloc(mm, pud, addr);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
|
||||
if (!alloc_pte)
|
||||
return pmd;
|
||||
|
||||
if (!pmd_none(*pmd) && !pmd_bad(*pmd))
|
||||
return pmd;
|
||||
|
||||
if (!pte_alloc(mm, pmd))
|
||||
return pmd;
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vm_fault_t pgtable_share_copy_pmd(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *shadow_vma,
|
||||
unsigned long addr)
|
||||
{
|
||||
struct mm_struct *orig_mm = orig_vma->vm_mm;
|
||||
struct mm_struct *shadow_mm = shadow_vma->vm_mm;
|
||||
pmd_t *pmd, *shadow_pmd;
|
||||
spinlock_t *ptl;
|
||||
|
||||
pmd = pgtable_share_create_pmd(orig_mm, addr, false);
|
||||
if (!pmd)
|
||||
goto out;
|
||||
|
||||
shadow_pmd = pgtable_share_create_pmd(shadow_mm, addr, true);
|
||||
if (!shadow_pmd)
|
||||
goto out;
|
||||
|
||||
ptl = pmd_lock(orig_mm, pmd);
|
||||
if (!pmd_none(*pmd)) {
|
||||
if (!pmd_same(*pmd, *shadow_pmd)) {
|
||||
unsigned long pmd_aligned = (addr & PMD_MASK) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* It's almost impossible to run here, but for
|
||||
* security, print some warning messages and
|
||||
* set original pmd.
|
||||
*/
|
||||
pr_warn("the original pmd has different value with shadow pmd");
|
||||
|
||||
pmd_clear(pmd);
|
||||
flush_tlb_range(orig_vma, pmd_aligned, pmd_aligned + PMD_SIZE);
|
||||
set_pmd_at(orig_mm, addr, pmd, *shadow_pmd);
|
||||
spin_unlock(ptl);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
} else {
|
||||
pmd_populate(orig_mm, pmd, pmd_pgtable(*shadow_pmd));
|
||||
get_page(pmd_page(*shadow_pmd));
|
||||
add_mm_counter(orig_mm, MM_SHMEMPAGES, HPAGE_PMD_NR);
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the mm struct and page table data, and the shadow vma
|
||||
* is also freed in mmput()->exit_mmap()->unmap_vmas().
|
||||
|
|
Loading…
Reference in New Issue