mm: convert p[te|md]_numa users to p[te|md]_protnone_numa
Convert existing users of pte_numa and friends to the new helper. Note
that the kernel is broken after this patch is applied until the other page
table modifiers are also altered. This patch layout is to make review
easier.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 510bdfb..625407e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -212,7 +212,7 @@
/* Look up the Linux PTE for the backing page */
pte_size = psize;
pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
- if (pte_present(pte) && !pte_numa(pte)) {
+ if (pte_present(pte) && !pte_protnone(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6154b0a..f38327b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -398,8 +398,6 @@
* processors use the same I/D cache coherency mechanism
* as embedded.
*/
- if (error_code & DSISR_PROTFAULT)
- goto bad_area;
#endif /* CONFIG_PPC_STD_MMU */
/*
@@ -423,9 +421,6 @@
flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c90e602..83dfcb5 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -172,9 +172,14 @@
void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte)
{
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
-#endif
+ /*
+ * When handling numa faults, we already have the pte marked
+ * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+ * Hence we can use set_pte_at for them.
+ */
+ VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_USER));
+
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 4fe5f64..91bb883 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -718,7 +718,8 @@
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
+ WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_USER));
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 89df70e..81bf3d2 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -84,7 +84,7 @@
struct page *page;
/* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_numa(pte)) {
+ if (pte_protnone(pte)) {
pte_unmap(ptep);
return 0;
}
@@ -178,7 +178,7 @@
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
- if (pmd_numa(pmd))
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
return 0;
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 0d11c3d..9cd8b21 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -67,7 +67,7 @@
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
-#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */
+#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */
#endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/gup.c b/mm/gup.c
index c2da116..51bf0b0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -64,7 +64,7 @@
migration_entry_wait(mm, pmd, address);
goto retry;
}
- if ((flags & FOLL_NUMA) && pte_numa(pte))
+ if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
if ((flags & FOLL_WRITE) && !pte_write(pte)) {
pte_unmap_unlock(ptep, ptl);
@@ -184,7 +184,7 @@
return page;
return no_page_table(vma, flags);
}
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
return no_page_table(vma, flags);
if (pmd_trans_huge(*pmd)) {
if (flags & FOLL_SPLIT) {
@@ -906,10 +906,10 @@
/*
* Similar to the PMD case below, NUMA hinting must take slow
- * path
+ * path using the pte_protnone check.
*/
if (!pte_present(pte) || pte_special(pte) ||
- pte_numa(pte) || (write && !pte_write(pte)))
+ pte_protnone(pte) || (write && !pte_write(pte)))
goto pte_unmap;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -1104,7 +1104,7 @@
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
- if (pmd_numa(pmd))
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c692136..915941c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1211,7 +1211,7 @@
return ERR_PTR(-EFAULT);
/* Full NUMA hinting faults to serialise migration in fault paths */
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
goto out;
page = pmd_page(*pmd);
@@ -1342,7 +1342,7 @@
/*
* Migrate the THP to the requested node, returns with page unlocked
- * and pmd_numa cleared.
+ * and access rights restored.
*/
spin_unlock(ptl);
migrated = migrate_misplaced_transhuge_page(mm, vma,
@@ -1357,7 +1357,7 @@
BUG_ON(!PageLocked(page));
pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
- VM_BUG_ON(pmd_numa(*pmdp));
+ VM_BUG_ON(pmd_protnone(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
out_unlock:
@@ -1483,7 +1483,7 @@
ret = 1;
if (!prot_numa) {
entry = pmdp_get_and_clear_notify(mm, addr, pmd);
- if (pmd_numa(entry))
+ if (pmd_protnone(entry))
entry = pmd_mknonnuma(entry);
entry = pmd_modify(entry, newprot);
ret = HPAGE_PMD_NR;
@@ -1499,7 +1499,7 @@
* local vs remote hits on the zero page.
*/
if (!is_huge_zero_page(page) &&
- !pmd_numa(*pmd)) {
+ !pmd_protnone(*pmd)) {
pmdp_set_numa(mm, addr, pmd);
ret = HPAGE_PMD_NR;
}
@@ -1767,9 +1767,9 @@
pte_t *pte, entry;
BUG_ON(PageCompound(page+i));
/*
- * Note that pmd_numa is not transferred deliberately
- * to avoid any possibility that pte_numa leaks to
- * a PROT_NONE VMA by accident.
+ * Note that NUMA hinting access restrictions are not
+ * transferred to avoid any possibility of altering
+ * permissions across VMAs.
*/
entry = mk_pte(page + i, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory.c b/mm/memory.c
index bbe6a73..92e6a62 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3124,7 +3124,7 @@
pte, pmd, flags, entry);
}
- if (pte_numa(entry))
+ if (pte_protnone(entry))
return do_numa_page(mm, vma, address, entry, pte, pmd);
ptl = pte_lockptr(mm, pmd);
@@ -3202,7 +3202,7 @@
if (pmd_trans_splitting(orig_pmd))
return 0;
- if (pmd_numa(orig_pmd))
+ if (pmd_protnone(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3312166..44ffa69 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -75,36 +75,18 @@
oldpte = *pte;
if (pte_present(oldpte)) {
pte_t ptent;
- bool updated = false;
- if (!prot_numa) {
- ptent = ptep_modify_prot_start(mm, addr, pte);
- if (pte_numa(ptent))
- ptent = pte_mknonnuma(ptent);
- ptent = pte_modify(ptent, newprot);
- /*
- * Avoid taking write faults for pages we
- * know to be dirty.
- */
- if (dirty_accountable && pte_dirty(ptent) &&
- (pte_soft_dirty(ptent) ||
- !(vma->vm_flags & VM_SOFTDIRTY)))
- ptent = pte_mkwrite(ptent);
- ptep_modify_prot_commit(mm, addr, pte, ptent);
- updated = true;
- } else {
- struct page *page;
+ ptent = ptep_modify_prot_start(mm, addr, pte);
+ ptent = pte_modify(ptent, newprot);
- page = vm_normal_page(vma, addr, oldpte);
- if (page && !PageKsm(page)) {
- if (!pte_numa(oldpte)) {
- ptep_set_numa(mm, addr, pte);
- updated = true;
- }
- }
+ /* Avoid taking write faults for known dirty pages */
+ if (dirty_accountable && pte_dirty(ptent) &&
+ (pte_soft_dirty(ptent) ||
+ !(vma->vm_flags & VM_SOFTDIRTY))) {
+ ptent = pte_mkwrite(ptent);
}
- if (updated)
- pages++;
+ ptep_modify_prot_commit(mm, addr, pte, ptent);
+ pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index dfb79e0..4b8ad76 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -193,7 +193,7 @@
pmd_t *pmdp)
{
pmd_t entry = *pmdp;
- if (pmd_numa(entry))
+ if (pmd_protnone(entry))
entry = pmd_mknonnuma(entry);
set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);