Home Home > GIT Browse > stable-xen
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Thumshirn <jthumshirn@suse.de>2019-07-26 13:10:11 +0200
committerJohannes Thumshirn <jthumshirn@suse.de>2019-07-26 13:10:11 +0200
commita33063dfee684a51d328b77fe75fa5be6d0206b9 (patch)
tree9c45e5d5d36df3b7168d7f1f071e58fe9768bb73
parent2b355ab40480d8aaad7ef02afdc3fc11e4cb580c (diff)
parentf842439993980bce5cab8437aeaf77489e86c521 (diff)
Merge remote-tracking branch 'origin/users/msuchanek/SLE12-SP5/for-next' into SLE12-SP5
Pull ppc64 fixes from Michal Suchanek. suse-commit: b62b19571f0475c01379f44dd95d4d2566bdbd68
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h7
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h20
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h111
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h12
-rw-r--r--arch/powerpc/include/asm/cacheflush.h14
-rw-r--r--arch/powerpc/include/asm/cputable.h13
-rw-r--r--arch/powerpc/include/asm/hugetlb.h25
-rw-r--r--arch/powerpc/include/asm/mmu_context.h33
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h7
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h7
-rw-r--r--arch/powerpc/include/asm/paca.h5
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/tlb.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cputable.c19
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S4
-rw-r--r--arch/powerpc/kernel/idle_book3s.S50
-rw-r--r--arch/powerpc/kernel/process.c11
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S16
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c39
-rw-r--r--arch/powerpc/mm/hash_utils_64.c30
-rw-r--r--arch/powerpc/mm/hugetlbpage.c9
-rw-r--r--arch/powerpc/mm/mem.c4
-rw-r--r--arch/powerpc/mm/mmu_context.c6
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c12
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c12
-rw-r--r--arch/powerpc/mm/pgtable-radix.c89
-rw-r--r--arch/powerpc/mm/pgtable.c47
-rw-r--r--arch/powerpc/mm/tlb-radix.c489
-rw-r--r--arch/powerpc/perf/core-book3s.c34
-rw-r--r--arch/powerpc/perf/isa207-common.c12
-rw-r--r--arch/powerpc/perf/isa207-common.h5
-rw-r--r--arch/powerpc/perf/power9-pmu.c54
-rw-r--r--arch/powerpc/platforms/powernv/idle.c27
-rw-r--r--arch/powerpc/platforms/powernv/smp.c27
-rw-r--r--arch/powerpc/sysdev/xive/common.c8
-rw-r--r--drivers/misc/cxl/cxl.h8
-rw-r--r--drivers/misc/cxl/cxllib.c4
-rw-r--r--drivers/misc/cxl/pci.c41
44 files changed, 626 insertions, 744 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index b651843bd2e0..86fcc93216f7 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -234,15 +234,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
pte_t *ptep, pte_t entry,
- unsigned long address)
+ unsigned long address,
+ int psize)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long clr = ~pte_val(entry) & _PAGE_RO;
pte_update(ptep, clr, set);
+
+ flush_tlb_page(vma, address);
}
#define __HAVE_ARCH_PTE_SAME
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5c28bd6f2ae1..123752d6f44e 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -31,26 +31,6 @@ static inline int hstate_get_psize(struct hstate *hstate)
}
}
-#define arch_make_huge_pte arch_make_huge_pte
-static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable)
-{
- unsigned long page_shift;
-
- if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
- return entry;
-
- page_shift = huge_page_shift(hstate_vma(vma));
- /*
- * We don't support 1G hugetlb pages yet.
- */
- VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift);
- if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- return __pte(pte_val(entry) | R_PAGE_LARGE);
- else
- return entry;
-}
-
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static inline bool gigantic_page_supported(void)
{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 11e86633d97c..3ec6504626c8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -454,9 +454,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
{
if (full && radix_enabled()) {
/*
- * Let's skip the DD1 style pte update here. We know that
- * this is a full mm pte clear and hence can be sure there is
- * no parallel set_pte.
+ * We know that this is a full mm pte clear and
+ * hence can be sure there is no parallel set_pte.
*/
return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
}
@@ -744,12 +743,14 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
* Generic functions with hash/radix callbacks
*/
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
pte_t *ptep, pte_t entry,
- unsigned long address)
+ unsigned long address,
+ int psize)
{
if (radix_enabled())
- return radix__ptep_set_access_flags(mm, ptep, entry, address);
+ return radix__ptep_set_access_flags(vma, ptep, entry,
+ address, psize);
return hash__ptep_set_access_flags(ptep, entry);
}
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 072c8c4e98aa..f344ed25b929 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -11,12 +11,6 @@
#include <asm/book3s/64/radix-4k.h>
#endif
-/*
- * For P9 DD1 only, we need to track whether the pte's huge.
- */
-#define R_PAGE_LARGE _RPAGE_RSV1
-
-
#ifndef __ASSEMBLY__
#include <asm/book3s/64/tlbflush-radix.h>
#include <asm/cpu_has_feature.h>
@@ -122,23 +116,28 @@
extern void radix__mark_rodata_ro(void);
#endif
+extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address,
+ int psize);
+
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
unsigned long set)
{
- pte_t pte;
- unsigned long old_pte, new_pte;
-
- do {
- pte = READ_ONCE(*ptep);
- old_pte = pte_val(pte);
- new_pte = (old_pte | set) & ~clr;
-
- } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
-
- return old_pte;
+ __be64 old_be, tmp_be;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3 # pte_update\n"
+ " andc %1,%0,%5 \n"
+ " or %1,%1,%4 \n"
+ " stdcx. %1,0,%3 \n"
+ " bne- 1b"
+ : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+ : "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+ : "cc" );
+
+ return be64_to_cpu(old_be);
}
-
static inline unsigned long radix__pte_update(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, unsigned long clr,
@@ -147,20 +146,7 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
{
unsigned long old_pte;
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-
- unsigned long new_pte;
-
- old_pte = __radix_pte_update(ptep, ~0ul, 0);
- /*
- * new value of pte
- */
- new_pte = (old_pte | set) & ~clr;
- radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr);
- if (new_pte)
- __radix_pte_update(ptep, 0, new_pte);
- } else
- old_pte = __radix_pte_update(ptep, clr, set);
+ old_pte = __radix_pte_update(ptep, clr, set);
if (!huge)
assert_pte_locked(mm, addr);
@@ -188,34 +174,6 @@ static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
return __pte(old_pte);
}
-/*
- * Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to invalidate tlb.
- */
-static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
- pte_t *ptep, pte_t entry,
- unsigned long address)
-{
-
- unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
- _PAGE_RW | _PAGE_EXEC);
-
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
-
- unsigned long old_pte, new_pte;
-
- old_pte = __radix_pte_update(ptep, ~0, 0);
- /*
- * new value of pte
- */
- new_pte = old_pte | set;
- radix__flush_tlb_pte_p9_dd1(old_pte, mm, address);
- __radix_pte_update(ptep, 0, new_pte);
- } else
- __radix_pte_update(ptep, 0, set);
- asm volatile("ptesync" : : : "memory");
-}
-
static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
{
return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);
@@ -230,7 +188,24 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
{
*ptep = pte;
- asm volatile("ptesync" : : : "memory");
+
+ /*
+ * The architecture suggests a ptesync after setting the pte, which
+ * orders the store that updates the pte with subsequent page table
+ * walk accesses which may load the pte. Without this it may be
+ * possible for a subsequent access to result in spurious fault.
+ *
+ * This is not necessary for correctness, because a spurious fault
+ * is tolerated by the page fault handler, and this store will
+ * eventually be seen. In testing, there was no noticable increase
+ * in user faults on POWER9. Avoiding ptesync here is a significant
+ * win for things like fork. If a future microarchitecture benefits
+ * from ptesync, it should probably go into update_mmu_cache, rather
+ * than set_pte_at (which is used to set ptes unrelated to faults).
+ *
+ * Spurious faults to vmalloc region are not tolerated, so there is
+ * a ptesync in flush_cache_vmap.
+ */
}
static inline int radix__pmd_bad(pmd_t pmd)
@@ -263,8 +238,6 @@ static inline int radix__pmd_trans_huge(pmd_t pmd)
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
{
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE);
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma,
@@ -301,18 +274,14 @@ static inline unsigned long radix__get_tree_size(void)
unsigned long rts_field;
/*
* We support 52 bits, hence:
- * DD1 52-28 = 24, 0b11000
- * Others 52-31 = 21, 0b10101
+ * bits 52 - 31 = 21, 0b10101
* RTS encoding details
* bits 0 - 3 of rts -> bits 6 - 8 unsigned long
* bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
*/
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- rts_field = (0x3UL << 61);
- else {
- rts_field = (0x5UL << 5); /* 6 - 8 bits */
- rts_field |= (0x2UL << 61);
- }
+ rts_field = (0x5UL << 5); /* 6 - 8 bits */
+ rts_field |= (0x2UL << 61);
+
return rts_field;
}
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 90e0de96376d..fba8c4b2015c 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -43,7 +43,5 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad
extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
-extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
- unsigned long address);
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index d9f3dc35d8fd..244a46f6533b 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -3,7 +3,7 @@
#define MMU_NO_CONTEXT ~0UL
-
+#include <linux/mm_types.h>
#include <asm/book3s/64/tlbflush-hash.h>
#include <asm/book3s/64/tlbflush-radix.h>
@@ -132,6 +132,16 @@ static inline void flush_all_mm(struct mm_struct *mm)
#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
#define flush_all_mm(mm) local_flush_all_mm(mm)
#endif /* CONFIG_SMP */
+
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ /* See ptep_set_access_flags comment */
+ if (atomic_read(&vma->vm_mm->context.copros) > 0)
+ flush_tlb_page(vma, address);
+}
+
/*
* flush the page walk cache for the address
*/
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 11843e37d9cf..0d72ec75da63 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -23,9 +23,21 @@
#define flush_cache_range(vma, start, end) do { } while (0)
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
#define flush_icache_page(vma, page) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Book3s has no ptesync after setting a pte, so without this ptesync it's
+ * possible for a kernel virtual mapping access to return a spurious fault
+ * if it's accessed right after the pte is set. The page fault handler does
+ * not expect this type of fault. flush_cache_vmap is not exactly the right
+ * place to put this, but it seems to work well enough.
+ */
+#define flush_cache_vmap(start, end) do { asm volatile("ptesync" ::: "memory"); } while (0)
+#else
+#define flush_cache_vmap(start, end) do { } while (0)
+#endif
+
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 7262a1b8d1a1..e4527faf3853 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -209,7 +209,6 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000)
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000)
#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
@@ -462,8 +461,6 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
CPU_FTR_P9_TLBIE_BUG)
-#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
- (~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
@@ -487,16 +484,14 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \
- CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
- CPU_FTRS_POWER9_DD2_2)
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2)
#else
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \
- CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
- CPU_FTRS_POWER9_DD2_2)
+ CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2)
#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
@@ -565,7 +560,7 @@ enum {
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
- CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & \
CPU_FTRS_DT_CPU_BASE)
#else
#define CPU_FTRS_ALWAYS \
@@ -573,7 +568,7 @@ enum {
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
- CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & \
CPU_FTRS_DT_CPU_BASE)
#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 339717304865..1d31bfba5326 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -40,12 +40,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
return radix__flush_hugetlb_page(vma, vmaddr);
}
-static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__local_flush_hugetlb_page(vma, vmaddr);
-}
#else
static inline pte_t *hugepd_page(hugepd_t hpd)
@@ -172,22 +166,9 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
return pte_wrprotect(pte);
}
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep,
- pte_t pte, int dirty)
-{
-#ifdef HUGETLB_NEED_PRELOAD
- /*
- * The "return 1" forces a call of update_mmu_cache, which will write a
- * TLB entry. Without this, platforms that don't do a write of the TLB
- * entry in the TLB miss handler asm will fault ad infinitum.
- */
- ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- return 1;
-#else
- return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-#endif
-}
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
static inline pte_t huge_ptep_get(pte_t *ptep)
{
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index d477a133c6bd..34de6b909d5c 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
{
int c;
- c = atomic_dec_if_positive(&mm->context.copros);
-
- /* Detect imbalance between add and remove */
- WARN_ON(c < 0);
-
/*
- * Need to broadcast a global flush of the full mm before
- * decrementing active_cpus count, as the next TLBI may be
- * local and the nMMU and/or PSL need to be cleaned up.
- * Should be rare enough so that it's acceptable.
+ * When removing the last copro, we need to broadcast a global
+ * flush of the full mm, as the next TLBI may be local and the
+ * nMMU and/or PSL need to be cleaned up.
+ *
+ * Both the 'copros' and 'active_cpus' counts are looked at in
+ * flush_all_mm() to determine the scope (local/global) of the
+ * TLBIs, so we need to flush first before decrementing
+ * 'copros'. If this API is used by several callers for the
+ * same context, it can lead to over-flushing. It's hopefully
+ * not common enough to be a problem.
*
* Skip on hash, as we don't know how to do the proper flush
* for the time being. Invalidations will remain global if
- * used on hash.
+ * used on hash. Note that we can't drop 'copros' either, as
+ * it could make some invalidations local with no flush
+ * in-between.
*/
- if (c == 0 && radix_enabled()) {
+ if (radix_enabled()) {
flush_all_mm(mm);
- dec_mm_active_cpus(mm);
+
+ c = atomic_dec_if_positive(&mm->context.copros);
+ /* Detect imbalance between add and remove */
+ WARN_ON(c < 0);
+
+ if (c == 0)
+ dec_mm_active_cpus(mm);
}
}
#else
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index cf2a2a12f6a9..a83633dc31fe 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -276,15 +276,18 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
pte_t *ptep, pte_t entry,
- unsigned long address)
+ unsigned long address,
+ int psize)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long clr = ~pte_val(entry) & _PAGE_RO;
pte_update(ptep, clr, set);
+
+ flush_tlb_page(vma, address);
}
#define __HAVE_ARCH_PTE_SAME
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index c3a1482736c0..c9ed6065b416 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -284,9 +284,10 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
/* Set the dirty and/or accessed bits atomically in a linux PTE, this
* function doesn't need to flush the hash entry
*/
-static inline void __ptep_set_access_flags(struct mm_struct *mm,
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
pte_t *ptep, pte_t entry,
- unsigned long address)
+ unsigned long address,
+ int psize)
{
unsigned long bits = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
@@ -308,6 +309,8 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
unsigned long old = pte_val(*ptep);
*ptep = __pte(old | bits);
#endif
+
+ flush_tlb_page(vma, address);
}
#define __HAVE_ARCH_PTE_SAME
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 9beb9b7c1f5d..b10f6a6305d5 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -189,11 +189,6 @@ struct paca_struct {
u8 subcore_sibling_mask;
/* Flag to request this thread not to stop */
atomic_t dont_stop;
- /*
- * Pointer to an array which contains pointer
- * to the sibling threads' paca.
- */
- struct paca_struct **thread_sibling_pacas;
/* The PSSCR value that the kernel requested before going to stop */
u64 requested_psscr;
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b4e538f3e6fd..b167758bf9c1 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -7,6 +7,7 @@
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
+#include <asm/tlbflush.h>
struct mm_struct;
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index a7eabff27a0f..9138baccebb0 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -76,6 +76,19 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
return false;
return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
}
+static inline void mm_reset_thread_local(struct mm_struct *mm)
+{
+ WARN_ON(atomic_read(&mm->context.copros) > 0);
+ /*
+ * It's possible for mm_access to take a reference on mm_users to
+ * access the remote mm from another thread, but it's not allowed
+ * to set mm_cpumask, so mm_users may be > 1 here.
+ */
+ WARN_ON(current->mm != mm);
+ atomic_set(&mm->context.active_cpus, 1);
+ cpumask_clear(mm_cpumask(mm));
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+}
#else /* CONFIG_PPC_BOOK3S_64 */
static inline int mm_is_thread_local(struct mm_struct *mm)
{
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index aae1592d7c8a..eb6ef2f4c6e4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -759,7 +759,6 @@ int main(void)
OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
- OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index df354b63d483..2a1d4ca5e42f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -485,25 +485,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
- { /* Power9 DD1*/
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x004e0100,
- .cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD1,
- .cpu_user_features = COMMON_USER_POWER9,
- .cpu_user_features2 = COMMON_USER2_POWER9,
- .mmu_features = MMU_FTRS_POWER9,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power9",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power9,
- .cpu_restore = __restore_cpu_power9,
- .machine_check_early = __machine_check_early_realmode_p9,
- .platform = "power9",
- },
{ /* Power9 DD2.0 */
.pvr_mask = 0xffffefff,
.pvr_value = 0x004e0200,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 685e6c8fc71e..7719896657a7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -697,9 +697,7 @@ static __init void cpufeatures_cpu_quirks(void)
/*
* Not all quirks can be derived from the cpufeatures device tree.
*/
- if ((version & 0xffffff00) == 0x004e0100)
- cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
- else if ((version & 0xffffefff) == 0x004e0200)
+ if ((version & 0xffffefff) == 0x004e0200)
; /* DD2.0 has no feature flag */
else if ((version & 0xffffefff) == 0x004e0201)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index b23fd2bcb1d1..a46edeef61fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -219,9 +219,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
*
* This interrupt can wake directly from idle. If that is the case,
* the machine check is handled then the idle wakeup code is called
- * to restore state. In that case, the POWER9 DD1 idle PACA workaround
- * is not applied in the early machine check code, which will cause
- * bugs.
+ * to restore state.
*/
mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1c17d88db288..c6091ff46ccf 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -467,43 +467,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
#endif
/*
- * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
- * HSPRG0 will be set to the HSPRG0 value of one of the
- * threads in this core. Thus the value we have in r13
- * may not be this thread's paca pointer.
- *
- * Fortunately, the TIR remains invariant. Since this thread's
- * paca pointer is recorded in all its sibling's paca, we can
- * correctly recover this thread's paca pointer if we
- * know the index of this thread in the core.
- *
- * This index can be obtained from the TIR.
- *
- * i.e, thread's position in the core = TIR.
- * If this value is i, then this thread's paca is
- * paca->thread_sibling_pacas[i].
- */
-power9_dd1_recover_paca:
- mfspr r4, SPRN_TIR
- /*
- * Since each entry in thread_sibling_pacas is 8 bytes
- * we need to left-shift by 3 bits. Thus r4 = i * 8
- */
- sldi r4, r4, 3
- /* Get &paca->thread_sibling_pacas[0] in r5 */
- ld r5, PACA_SIBLING_PACA_PTRS(r13)
- /* Load paca->thread_sibling_pacas[i] into r13 */
- ldx r13, r4, r5
- SET_PACA(r13)
- /*
- * Indicate that we have lost NVGPR state
- * which needs to be restored from the stack.
- */
- li r3, 1
- stb r3,PACA_NAPSTATELOST(r13)
- blr
-
-/*
* Called from machine check handler for powersave wakeups.
* Low level machine check processing has already been done. Now just
* go through the wake up path to get everything in order.
@@ -537,9 +500,6 @@ pnv_powersave_wakeup:
ld r2, PACATOC(r13)
BEGIN_FTR_SECTION
-BEGIN_FTR_SECTION_NESTED(70)
- bl power9_dd1_recover_paca
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
bl pnv_restore_hyp_resource_arch300
FTR_SECTION_ELSE
bl pnv_restore_hyp_resource_arch207
@@ -602,22 +562,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
-BEGIN_FTR_SECTION_NESTED(71)
- /*
- * Assume that we are waking up from the state
- * same as the Requested Level (RL) in the PSSCR
- * which are Bits 60-63
- */
- ld r5,PACA_REQ_PSSCR(r13)
- rldicl r5,r5,0,60
-FTR_SECTION_ELSE_NESTED(71)
/*
* 0-3 bits correspond to Power-Saving Level Status
* which indicates the idle state we are waking up from
*/
mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
li r0, 0 /* clear requested_psscr to say we're awake */
std r0, PACA_REQ_PSSCR(r13)
cmpd cr4,r5,r4
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5a24b0e3b429..ec1c2330d2ce 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1272,17 +1272,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
* mappings. We don't have a VAS driver that allocates those
* yet, so no cpabort is required.
*/
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- /*
- * DD1 allows paste into normal system memory, so we
- * do an unpaired copy here to clear the buffer and
- * prevent a covert channel being set up.
- *
- * cpabort is not used because it is quite expensive.
- */
- asm volatile(PPC_COPY(%0, %1)
- : : "r"(dummy_copy_buffer), "r"(0));
- }
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f9e58efaa367..5e97d4f836ca 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -66,10 +66,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
bits = root & RPDS_MASK;
root = root & RPDB_MASK;
- /* P9 DD1 interprets RTS (radix tree size) differently */
offset = rts + 31;
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- offset -= 3;
/* current implementations only support 52-bit space */
if (offset != 52)
@@ -180,17 +177,7 @@ static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
unsigned long clr, unsigned long set,
unsigned long addr, unsigned int shift)
{
- unsigned long old = 0;
-
- if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
- pte_present(*ptep)) {
- /* have to invalidate it first */
- old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
- kvmppc_radix_tlbie_page(kvm, addr, shift);
- set |= _PAGE_PRESENT;
- old &= _PAGE_PRESENT;
- }
- return __radix_pte_update(ptep, clr, set) | old;
+ return __radix_pte_update(ptep, clr, set);
}
void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4e3b91196dfa..284e61ddb9ed 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1695,14 +1695,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
- /*
- * POWER9 DD1 has an erratum where writing TBU40 causes
- * the timebase to lose ticks. So we don't let the
- * timebase offset be changed on P9 DD1. (It is
- * initialized to zero.)
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2028,8 +2020,6 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
/*
* Set the default HFSCR for the guest from the host value.
* This value is only used on POWER9.
- * On POWER9 DD1, TM doesn't work, so we make sure to
- * prevent the guest from using it.
* On POWER9, we want to virtualize the doorbell facility, so we
* turn off the HFSCR bit, which causes those instructions to trap.
*/
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2282c8f15f00..f270c06c35a1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -926,9 +926,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
@@ -1922,7 +1919,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi cr2, r0, 0
- beq cr2, 4f
+ beq cr2, 2f
/*
* Radix: do eieio; tlbsync; ptesync sequence in case we
@@ -1962,11 +1959,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
bdnz 1b
ptesync
-2: /* Flush the ERAT on radix P9 DD1 guest exit */
-BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-4:
+2:
#endif /* CONFIG_PPC_RADIX_MMU */
/*
@@ -3377,11 +3370,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
- /* Flush the ERAT on radix P9 DD1 guest exit */
-BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
-END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-
BEGIN_MMU_FTR_SECTION
b 4f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 6e41ba7ec8f4..4171ede8722b 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -25,18 +25,6 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
*/
eieio();
- /*
- * DD1 bug workaround: If PIPR is less favored than CPPR
- * ignore the interrupt or we might incorrectly lose an IPB
- * bit.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
- u8 pipr = be64_to_cpu(qw1) & 0xff;
- if (pipr >= xc->hw_cppr)
- return;
- }
-
/* Perform the acknowledge OS to register cycle. */
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
@@ -89,8 +77,15 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
- else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
+ else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW)
opal_int_eoi(hw_irq);
+ else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+ /*
+ * For LSIs the HW EOI cycle is used rather than PQ bits,
+ * as they are automatically re-triggred in HW when still
+ * pending.
+ */
+ __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
} else {
uint64_t eoi_val;
@@ -102,20 +97,12 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
*
* This allows us to then do a re-trigger if Q was set
* rather than synthetizing an interrupt in software
- *
- * For LSIs, using the HW EOI cycle works around a problem
- * on P9 DD1 PHBs where the other ESB accesses don't work
- * properly.
*/
- if (xd->flags & XIVE_IRQ_FLAG_LSI)
- __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
- else {
- eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
-
- /* Re-trigger if needed */
- if ((eoi_val & 1) && __x_trig_page(xd))
- __x_writeq(0, __x_trig_page(xd));
- }
+ eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
+
+ /* Re-trigger if needed */
+ if ((eoi_val & 1) && __x_trig_page(xd))
+ __x_writeq(0, __x_trig_page(xd));
}
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 16bb7a1cbfcc..b734050c7268 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -806,31 +806,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-static void update_hid_for_hash(void)
-{
- unsigned long hid0;
- unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
- asm volatile("ptesync": : :"memory");
- /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
- asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
- trace_tlbie(0, 0, rb, 0, 2, 0, 0);
-
- /*
- * now switch the HID
- */
- hid0 = mfspr(SPRN_HID0);
- hid0 &= ~HID0_POWER9_RADIX;
- mtspr(SPRN_HID0, hid0);
- asm volatile("isync": : :"memory");
-
- /* Wait for it to happen */
- while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
- cpu_relax();
-}
-
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
@@ -843,8 +818,6 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
htab_size = __ilog2(htab_size) - 18;
mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_hash();
}
static void __init htab_initialize(void)
@@ -1074,9 +1047,6 @@ void hash__early_init_mmu_secondary(void)
/* Initialize hash table for that CPU */
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_hash();
-
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c6b5ab185643..822edbab026e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -764,15 +764,12 @@ static int __init add_huge_page_size(unsigned long long size)
* firmware we only add hugetlb support for page sizes that can be
* supported by linux page table layout.
* For now we have
- * Radix: 2M
+ * Radix: 2M and 1G
* Hash: 16M and 16G
*/
if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
- (mmu_psize != MMU_PAGE_1G))
- return -EINVAL;
- }
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
} else {
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
return -EINVAL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 76f2b313c73f..5912d0458a6d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -516,8 +516,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
*/
unsigned long access, trap;
- if (radix_enabled())
+ if (radix_enabled()) {
+ prefetch((void *)address);
return;
+ }
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(*ptep) || address >= TASK_SIZE)
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index d60a62bf4fc7..4cde5fd68932 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -56,8 +56,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* in switch_slb(), and/or the store of paca->mm_ctx_id in
* copy_mm_to_paca().
*
- * On the read side the barrier is in pte_xchg(), which orders
- * the store to the PTE vs the load of mm_cpumask.
+ * On the other side, the barrier is in mm/tlb-radix.c for
+ * radix which orders earlier stores to clear the PTEs vs
+ * the load of mm_cpumask. And pte_xchg which does the same
+ * thing for hash.
*/
smp_mb();
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f7090b277027..c2a8f15a7972 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -296,15 +296,7 @@ void arch_exit_mmap(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
-
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- isync();
- mtspr(SPRN_PID, next->context.id);
- isync();
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
- } else {
- mtspr(SPRN_PID, next->context.id);
- isync();
- }
+ mtspr(SPRN_PID, next->context.id);
+ isync();
}
#endif
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 31eed8fa8e99..6014e6768630 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -37,9 +37,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp),
- pmd_pte(entry), address);
- flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ /*
+ * We can use MMU_PAGE_2M here, because only radix
+ * path look at the psize.
+ */
+ __ptep_set_access_flags(vma, pmdp_ptep(pmdp),
+ pmd_pte(entry), address, MMU_PAGE_2M);
}
return changed;
}
@@ -116,7 +119,8 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
- return;
+ if (radix_enabled())
+ prefetch((void *)addr);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 47fb36b8d77e..ad57798e9cdc 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -163,16 +163,6 @@ void radix__mark_rodata_ro(void)
{
unsigned long start, end;
- /*
- * mark_rodata_ro() will mark itself as !writable at some point.
- * Due to DD1 workaround in radix__pte_update(), we'll end up with
- * an invalid pte and the system will crash quite severly.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
- return;
- }
-
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
@@ -459,35 +449,6 @@ found:
return;
}
-static void update_hid_for_radix(void)
-{
- unsigned long hid0;
- unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
-
- asm volatile("ptesync": : :"memory");
- /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
- /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
- asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
- trace_tlbie(0, 0, rb, 0, 2, 0, 1);
- trace_tlbie(0, 0, rb, 0, 2, 1, 1);
-
- /*
- * now switch the HID
- */
- hid0 = mfspr(SPRN_HID0);
- hid0 |= HID0_POWER9_RADIX;
- mtspr(SPRN_HID0, hid0);
- asm volatile("isync": : :"memory");
-
- /* Wait for it to happen */
- while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
- cpu_relax();
-}
-
static void radix_init_amor(void)
{
/*
@@ -502,22 +463,12 @@ static void radix_init_amor(void)
static void radix_init_iamr(void)
{
- unsigned long iamr;
-
- /*
- * The IAMR should set to 0 on DD1.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- iamr = 0;
- else
- iamr = (1ul << 62);
-
/*
* Radix always uses key0 of the IAMR to determine if an access is
* allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
* fetch.
*/
- mtspr(SPRN_IAMR, iamr);
+ mtspr(SPRN_IAMR, (1ul << 62));
}
void __init radix__early_init_mmu(void)
@@ -569,8 +520,6 @@ void __init radix__early_init_mmu(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_radix();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -596,10 +545,6 @@ void radix__early_init_mmu_secondary(void)
* update partition table control register and UPRT
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
-
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- update_hid_for_radix();
-
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
@@ -1009,3 +954,35 @@ int radix__has_transparent_hugepage(void)
return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address, int psize)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
+ _PAGE_RW | _PAGE_EXEC);
+ /*
+ * To avoid NMMU hang while relaxing access, we need mark
+ * the pte invalid in between.
+ */
+ if (atomic_read(&mm->context.copros) > 0) {
+ unsigned long old_pte, new_pte;
+
+ old_pte = __radix_pte_update(ptep, ~0, 0);
+ /*
+ * new value of pte
+ */
+ new_pte = old_pte | set;
+ radix__flush_tlb_page_psize(mm, address, psize);
+ __radix_pte_update(ptep, 0, new_pte);
+ } else {
+ __radix_pte_update(ptep, 0, set);
+ /*
+ * Book3S does not require a TLB flush when relaxing access
+ * restrictions when the address space is not attached to a
+ * NMMU, because the core MMU will reload the pte after taking
+ * an access fault, which is defined by the architectue.
+ */
+ }
+ /* See ptesync comment in radix__set_pte_at */
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index a03ff3d99e0c..2aecc3d2749d 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -220,14 +220,53 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
entry = set_access_flags_filter(entry, vma, dirty);
changed = !pte_same(*(ptep), entry);
if (changed) {
- if (!is_vm_hugetlb_page(vma))
- assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(vma->vm_mm, ptep, entry, address);
- flush_tlb_page(vma, address);
+ assert_pte_locked(vma->vm_mm, address);
+ __ptep_set_access_flags(vma, ptep, entry,
+ address, mmu_virtual_psize);
}
return changed;
}
+#ifdef CONFIG_HUGETLB_PAGE
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+#ifdef HUGETLB_NEED_PRELOAD
+ /*
+ * The "return 1" forces a call of update_mmu_cache, which will write a
+ * TLB entry. Without this, platforms that don't do a write of the TLB
+ * entry in the TLB miss handler asm will fault ad infinitum.
+ */
+ ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+ return 1;
+#else
+ int changed, psize;
+
+ pte = set_access_flags_filter(pte, vma, dirty);
+ changed = !pte_same(*(ptep), pte);
+ if (changed) {
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct hstate *hstate = hstate_file(vma->vm_file);
+ psize = hstate_get_psize(hstate);
+#else
+ /*
+ * Not used on non book3s64 platforms. But 8xx
+ * can possibly use tsize derived from hstate.
+ */
+ psize = 0;
+#endif
+#ifdef CONFIG_DEBUG_VM
+ assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+ __ptep_set_access_flags(vma, ptep, pte, addr, psize);
+ }
+ return changed;
+#endif
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
#ifdef CONFIG_DEBUG_VM
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
{
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index ae587f4240a9..9625267f030b 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,6 +12,8 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
+#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
@@ -104,6 +106,20 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
+static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
static inline void __tlbiel_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
@@ -178,38 +194,93 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(53); /* IS = 1 */
- rs = pid << PPC_BITLSHIFT(31);
- prs = 1; /* process scoped */
- r = 1; /* radix format */
-
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid(pid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid(pid, RIC_FLUSH_ALL);
+ }
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+ unsigned long psize, unsigned long ric)
{
+ unsigned long ap = mmu_get_ap(psize);
+
asm volatile("ptesync": : :"memory");
__tlbiel_va(va, pid, ap, ric);
asm volatile("ptesync": : :"memory");
}
+static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+ unsigned long psize, unsigned long ric)
{
+ unsigned long ap = mmu_get_ap(psize);
+
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
/*
* Base TLB flushing operations:
@@ -252,12 +323,11 @@ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmadd
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
preempt_enable();
}
@@ -265,14 +335,22 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
{
#ifdef CONFIG_HUGETLB_PAGE
/* need the return fix for nohash.c */
- if (vma && is_vm_hugetlb_page(vma))
- return __local_flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
+static bool mm_is_singlethreaded(struct mm_struct *mm)
+{
+ if (atomic_read(&mm->context.copros) > 0)
+ return false;
+ if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
+ return true;
+ return false;
+}
+
static bool mm_needs_flush_escalation(struct mm_struct *mm)
{
/*
@@ -280,47 +358,107 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
* caching PTEs and not flushing them properly when
* RIC = 0 for a PID/LPID invalidate
*/
- return atomic_read(&mm->context.copros) != 0;
+ if (atomic_read(&mm->context.copros) > 0)
+ return true;
+ return false;
}
#ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+ unsigned long pid = mm->context.id;
+
+ if (current->mm == mm)
+ return; /* Local CPU */
+
+ if (current->active_mm == mm) {
+ /*
+ * Must be a kernel thread because sender is single-threaded.
+ */
+ BUG_ON(current->mm);
+ mmgrab(&init_mm);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ mmdrop(mm);
+ }
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+}
+
+static void exit_flush_lazy_tlbs(struct mm_struct *mm)
+{
+ /*
+ * Would be nice if this was async so it could be run in
+ * parallel with our local flush, but generic code does not
+ * give a good API for it. Could extend the generic code or
+ * make a special powerpc IPI for flushing TLBs.
+ * For now it's not too performance critical.
+ */
+ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
+ (void *)mm, 1);
+ mm_reset_thread_local(mm);
+}
+
void radix__flush_tlb_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
+ /*
+ * Order loads of mm_cpumask vs previous stores to clear ptes before
+ * the invalidate. See barrier in switch_mm_irqs_off
+ */
+ smp_mb();
if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ exit_flush_lazy_tlbs(mm);
+ goto local;
+ }
+
if (mm_needs_flush_escalation(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbie_pid(pid, RIC_FLUSH_TLB);
- } else
+ } else {
+local:
_tlbiel_pid(pid, RIC_FLUSH_TLB);
-no_context:
+ }
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
-void radix__flush_all_mm(struct mm_struct *mm)
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
- if (!mm_is_thread_local(mm))
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ if (!fullmm) {
+ exit_flush_lazy_tlbs(mm);
+ goto local;
+ }
+ }
_tlbie_pid(pid, RIC_FLUSH_ALL);
- else
+ } else {
+local:
_tlbiel_pid(pid, RIC_FLUSH_ALL);
-no_context:
+ }
preempt_enable();
}
+void radix__flush_all_mm(struct mm_struct *mm)
+{
+ __flush_all_mm(mm, false);
+}
EXPORT_SYMBOL(radix__flush_all_mm);
void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
@@ -333,28 +471,33 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
- preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto bail;
- if (!mm_is_thread_local(mm))
- _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
- else
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
-bail:
+ return;
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ exit_flush_lazy_tlbs(mm);
+ goto local;
+ }
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ } else {
+local:
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ }
preempt_enable();
}
void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
#ifdef CONFIG_HUGETLB_PAGE
- if (vma && is_vm_hugetlb_page(vma))
- return flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_page(vma, vmaddr);
#endif
- radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
@@ -368,17 +511,99 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+#define TLB_FLUSH_ALL -1UL
+
/*
- * Currently, for range flushing, we just do a full mm flush. Because
- * we use this in code path where we don' track the page size.
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
*/
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ if (end != TLB_FLUSH_ALL) {
+ exit_flush_lazy_tlbs(mm);
+ goto is_local;
+ }
+ }
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ } else {
+is_local:
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ }
+
+ if (full) {
+ if (local) {
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ }
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+ hend = end >> HPAGE_PMD_SHIFT;
+ if (hstart < hend) {
+ hstart <<= HPAGE_PMD_SHIFT;
+ hend <<= HPAGE_PMD_SHIFT;
+ hflush = true;
+ }
+#endif
- radix__flush_tlb_mm(mm);
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -397,13 +622,15 @@ static int radix_get_mmu_psize(int page_size)
return psize;
}
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
- psize = radix_get_mmu_psize(page_size);
/*
* if page size is not something we understand, do a full mm flush
*
@@ -411,108 +638,120 @@ void radix__tlb_flush(struct mmu_gather *tlb)
* that flushes the process table entry cache upon process teardown.
* See the comment for radix in arch_exit_mmap().
*/
- if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
- radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
- else if (tlb->fullmm || tlb->need_flush_all) {
- tlb->need_flush_all = 0;
- radix__flush_all_mm(mm);
- } else
- radix__flush_tlb_mm(mm);
-}
+ if (tlb->fullmm) {
+ __flush_all_mm(mm, true);
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
-#define TLB_FLUSH_ALL -1UL
-/*
- * Number of pages above which we will do a bcast tlbie. Just a
- * number at this point copied from x86
- */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
+ radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
+ }
+ tlb->need_flush_all = 0;
+}
-void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ int psize, bool also_pwc)
{
unsigned long pid;
- unsigned long addr;
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(psize);
- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
preempt_disable();
- pid = mm ? mm->context.id : 0;
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto err_out;
+ smp_mb(); /* see radix__flush_tlb_mm */
+ if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ if (end != TLB_FLUSH_ALL) {
+ exit_flush_lazy_tlbs(mm);
+ goto is_local;
+ }
+ }
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ } else {
+is_local:
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ }
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (full) {
if (local) {
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ also_pwc = true;
+
+ _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
}
- goto err_out;
- }
- asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size) {
+ } else {
if (local)
- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else {
- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
- fixup_tlbie();
- }
+ _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
}
- if (local)
- asm volatile("ptesync": : :"memory");
- else
- asm volatile("eieio; tlbsync; ptesync": : :"memory");
-err_out:
preempt_enable();
}
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
+}
+
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ __radix__flush_tlb_range_psize(mm, start, end, psize, true);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(mmu_virtual_psize);
unsigned long pid, end;
-
- pid = mm ? mm->context.id : 0;
- preempt_disable();
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
/* 4k page size, just blow the world */
if (PAGE_SIZE == 0x1000) {
radix__flush_all_mm(mm);
- preempt_enable();
return;
}
- /* Otherwise first do the PWC */
- if (local)
- _tlbiel_pid(pid, RIC_FLUSH_PWC);
- else
- _tlbie_pid(pid, RIC_FLUSH_PWC);
-
- /* Then iterate the pages */
- asm volatile("ptesync": : :"memory");
end = addr + HPAGE_PMD_SIZE;
- for (; addr < end; addr += PAGE_SIZE) {
- if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
- }
- if (local)
- asm volatile("ptesync": : :"memory");
- else
- asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ if (!mm_is_thread_local(mm)) {
+ if (unlikely(mm_is_singlethreaded(mm))) {
+ exit_flush_lazy_tlbs(mm);
+ goto local;
+ }
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ goto local;
+ } else {
+local:
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ }
-no_context:
preempt_enable();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -548,24 +787,6 @@ void radix__flush_tlb_all(void)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
- unsigned long address)
-{
- /*
- * We track page size in pte only for DD1, So we can
- * call this only on DD1.
- */
- if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- VM_WARN_ON(1);
- return;
- }
-
- if (old_pte & R_PAGE_LARGE)
- radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
- else
- radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
-}
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 1ef0a4a4471d..b999daaf3918 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -128,10 +128,6 @@ static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
-static bool use_ic(u64 event)
-{
- return false;
-}
#endif /* CONFIG_PPC32 */
static bool regs_use_siar(struct pt_regs *regs)
@@ -714,14 +710,6 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
-static bool use_ic(u64 event)
-{
- if (cpu_has_feature(CPU_FTR_POWER9_DD1) &&
- (event == 0x200f2 || event == 0x300f2))
- return true;
-
- return false;
-}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -1046,7 +1034,6 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
static void power_pmu_read(struct perf_event *event)
{
s64 val, delta, prev;
- struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
if (event->hw.state & PERF_HES_STOPPED)
return;
@@ -1056,13 +1043,6 @@ static void power_pmu_read(struct perf_event *event)
if (is_ebb_event(event)) {
val = read_pmc(event->hw.idx);
- if (use_ic(event->attr.config)) {
- val = mfspr(SPRN_IC);
- if (val > cpuhw->ic_init)
- val = val - cpuhw->ic_init;
- else
- val = val + (0 - cpuhw->ic_init);
- }
local64_set(&event->hw.prev_count, val);
return;
}
@@ -1076,13 +1056,6 @@ static void power_pmu_read(struct perf_event *event)
prev = local64_read(&event->hw.prev_count);
barrier();
val = read_pmc(event->hw.idx);
- if (use_ic(event->attr.config)) {
- val = mfspr(SPRN_IC);
- if (val > cpuhw->ic_init)
- val = val - cpuhw->ic_init;
- else
- val = val + (0 - cpuhw->ic_init);
- }
delta = check_and_compute_delta(prev, val);
if (!delta)
return;
@@ -1535,13 +1508,6 @@ nocheck:
event->attr.branch_sample_type);
}
- /*
- * Workaround for POWER9 DD1 to use the Instruction Counter
- * register value for instruction counting
- */
- if (use_ic(event->attr.config))
- cpuhw->ic_init = mfspr(SPRN_IC);
-
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 9fd2e5c7a063..11cf9915738f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -59,7 +59,7 @@ static bool is_event_valid(u64 event)
{
u64 valid_mask = EVENT_VALID_MASK;
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
valid_mask = p9_EVENT_VALID_MASK;
return !(event & ~valid_mask);
@@ -86,8 +86,6 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
* Incase of Power9:
* Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
* or if group already have any marked events.
- * Non-Marked events (for DD1):
- * MMCRA[SDAR_MODE] will be set to 0b01
* For rest
* MMCRA[SDAR_MODE] will be set from event code.
* If sdar_mode from event is zero, default to 0b01. Hardware
@@ -96,7 +94,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
- else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
+ else if (p9_SDAR_MODE(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
else
*mmcra |= MMCRA_SDAR_MODE_DCACHE;
@@ -106,7 +104,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
static u64 thresh_cmp_val(u64 value)
{
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return value << p9_MMCRA_THR_CMP_SHIFT;
return value << MMCRA_THR_CMP_SHIFT;
@@ -114,7 +112,7 @@ static u64 thresh_cmp_val(u64 value)
static unsigned long combine_from_event(u64 event)
{
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return p9_EVENT_COMBINE(event);
return EVENT_COMBINE(event);
@@ -122,7 +120,7 @@ static unsigned long combine_from_event(u64 event)
static unsigned long combine_shift(unsigned long pmc)
{
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return p9_MMCR1_COMBINE_SHIFT(pmc);
return MMCR1_COMBINE_SHIFT(pmc);
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index cf28aef9ade3..e5a621699a6d 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -158,11 +158,6 @@
CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
-/*
- * Lets restrict use of PMC5 for instruction counting.
- */
-#define P9_DD1_TEST_ADDER (ISA207_TEST_ADDER | CNST_PMC_VAL(5))
-
/* Bits in MMCR1 for PowerISA v2.07 */
#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index d0280c020751..2af1f7706261 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -220,12 +220,6 @@ static struct attribute_group power9_pmu_events_group = {
.attrs = power9_events_attr,
};
-static const struct attribute_group *power9_isa207_pmu_attr_groups[] = {
- &isa207_pmu_format_group,
- &power9_pmu_events_group,
- NULL,
-};
-
PMU_FORMAT_ATTR(event, "config:0-51");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
PMU_FORMAT_ATTR(mark, "config:8");
@@ -268,17 +262,6 @@ static const struct attribute_group *power9_pmu_attr_groups[] = {
NULL,
};
-static int power9_generic_events_dd1[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
- [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT,
- [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
- [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
- [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
-};
-
static int power9_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
@@ -442,25 +425,6 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
#undef C
-static struct power_pmu power9_isa207_pmu = {
- .name = "POWER9",
- .n_counter = MAX_PMU_COUNTERS,
- .add_fields = ISA207_ADD_FIELDS,
- .test_adder = P9_DD1_TEST_ADDER,
- .compute_mmcr = isa207_compute_mmcr,
- .config_bhrb = power9_config_bhrb,
- .bhrb_filter_map = power9_bhrb_filter_map,
- .get_constraint = isa207_get_constraint,
- .get_alternatives = power9_get_alternatives,
- .disable_pmc = isa207_disable_pmc,
- .flags = PPMU_NO_SIAR | PPMU_ARCH_207S,
- .n_generic = ARRAY_SIZE(power9_generic_events_dd1),
- .generic_events = power9_generic_events_dd1,
- .cache_events = &power9_cache_events,
- .attr_groups = power9_isa207_pmu_attr_groups,
- .bhrb_nr = 32,
-};
-
static struct power_pmu power9_pmu = {
.name = "POWER9",
.n_counter = MAX_PMU_COUNTERS,
@@ -503,23 +467,7 @@ static int __init init_power9_pmu(void)
}
}
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- /*
- * Since PM_INST_CMPL may not provide right counts in all
- * sampling scenarios in power9 DD1, instead use PM_INST_DISP.
- */
- EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
- /*
- * Power9 DD1 should use PM_BR_CMPL_ALT event code for
- * "branches" to provide correct counter value.
- */
- EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT;
- EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT;
- rc = register_power_pmu(&power9_isa207_pmu);
- } else {
- rc = register_power_pmu(&power9_pmu);
- }
-
+ rc = register_power_pmu(&power9_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 6cfaf853ab84..cf15380c575e 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -177,11 +177,6 @@ static void pnv_alloc_idle_core_states(void)
paca[cpu].core_idle_state_ptr = core_idle_state;
paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
paca[cpu].thread_mask = 1 << j;
- if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
- continue;
- paca[cpu].thread_sibling_pacas =
- kmalloc_node(paca_ptr_array_size,
- GFP_KERNEL, node);
}
}
@@ -786,28 +781,6 @@ static int __init pnv_init_idle_states(void)
pnv_alloc_idle_core_states();
- /*
- * For each CPU, record its PACA address in each of it's
- * sibling thread's PACA at the slot corresponding to this
- * CPU's index in the core.
- */
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- int cpu;
-
- pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
- for_each_present_cpu(cpu) {
- int base_cpu = cpu_first_thread_sibling(cpu);
- int idx = cpu_thread_in_core(cpu);
- int i;
-
- for (i = 0; i < threads_per_core; i++) {
- int j = base_cpu + i;
-
- paca[j].thread_sibling_pacas[idx] = &paca[cpu];
- }
- }
- }
-
if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
ppc_md.power_save = power7_idle;
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ac4ed7e074b8..509ab17e0897 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -281,23 +281,6 @@ static void pnv_cause_ipi(int cpu)
ic_cause_ipi(cpu);
}
-static void pnv_p9_dd1_cause_ipi(int cpu)
-{
- int this_cpu = get_cpu();
-
- /*
- * POWER9 DD1 has a global addressed msgsnd, but for now we restrict
- * IPIs to same core, because it requires additional synchronization
- * for inter-core doorbells which we do not implement.
- */
- if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu)))
- doorbell_global_ipi(cpu);
- else
- ic_cause_ipi(cpu);
-
- put_cpu();
-}
-
static void __init pnv_smp_probe(void)
{
if (xive_enabled())
@@ -309,14 +292,10 @@ static void __init pnv_smp_probe(void)
ic_cause_ipi = smp_ops->cause_ipi;
WARN_ON(!ic_cause_ipi);
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- smp_ops->cause_ipi = pnv_p9_dd1_cause_ipi;
- else
- smp_ops->cause_ipi = doorbell_global_ipi;
- } else {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ smp_ops->cause_ipi = doorbell_global_ipi;
+ else
smp_ops->cause_ipi = pnv_cause_ipi;
- }
}
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 744190479411..7369f16625e9 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -319,7 +319,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
* The FW told us to call it. This happens for some
* interrupt sources that need additional HW whacking
* beyond the ESB manipulation. For example LPC interrupts
- * on P9 DD1.0 need a latch to be clared in the LPC bridge
+ * on P9 DD1.0 needed a latch to be clared in the LPC bridge
* itself. The Firmware will take care of it.
*/
if (WARN_ON_ONCE(!xive_ops->eoi))
@@ -337,9 +337,9 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
* This allows us to then do a re-trigger if Q was set
* rather than synthesizing an interrupt in software
*
- * For LSIs, using the HW EOI cycle works around a problem
- * on P9 DD1 PHBs where the other ESB accesses don't work
- * properly.
+ * For LSIs the HW EOI cycle is used rather than PQ bits,
+ * as they are automatically re-triggred in HW when still
+ * pending.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index cfd11bc68706..8f2548d46334 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -856,14 +856,6 @@ static inline bool cxl_is_power9(void)
return false;
}
-static inline bool cxl_is_power9_dd1(void)
-{
- if ((pvr_version_is(PVR_POWER9)) &&
- cpu_has_feature(CPU_FTR_POWER9_DD1))
- return true;
- return false;
-}
-
ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
loff_t off, size_t count);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index d8eeeb561922..d5f356c0187d 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -102,10 +102,6 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
if (rc)
return rc;
- if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
- /* workaround for DD1 - nbwind = capiind */
- cfg->dsnctl |= ((u64)0x02 << (63-47));
- }
cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 1149c3a4ac11..36fd49aca06e 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -467,23 +467,21 @@ int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
/* nMMU_ID Defaults to: b’000001001’*/
xsl_dsnctl |= ((u64)0x09 << (63-28));
- if (!(cxl_is_power9_dd1())) {
- /*
- * Used to identify CAPI packets which should be sorted into
- * the Non-Blocking queues by the PHB. This field should match
- * the PHB PBL_NBW_CMPM register
- * nbwind=0x03, bits [57:58], must include capi indicator.
- * Not supported on P9 DD1.
- */
- xsl_dsnctl |= (nbwind << (63-55));
+ /*
+ * Used to identify CAPI packets which should be sorted into
+ * the Non-Blocking queues by the PHB. This field should match
+ * the PHB PBL_NBW_CMPM register
+ * nbwind=0x03, bits [57:58], must include capi indicator.
+ * Not supported on P9 DD1.
+ */
+ xsl_dsnctl |= (nbwind << (63-55));
- /*
- * Upper 16b address bits of ASB_Notify messages sent to the
- * system. Need to match the PHB’s ASN Compare/Mask Register.
- * Not supported on P9 DD1.
- */
- xsl_dsnctl |= asnind;
- }
+ /*
+ * Upper 16b address bits of ASB_Notify messages sent to the
+ * system. Need to match the PHB’s ASN Compare/Mask Register.
+ * Not supported on P9 DD1.
+ */
+ xsl_dsnctl |= asnind;
*reg = xsl_dsnctl;
return 0;
@@ -541,15 +539,8 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
/* Snoop machines */
cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);
- if (cxl_is_power9_dd1()) {
- /* Disabling deadlock counter CAR */
- cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
- /* Enable NORST */
- cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
- } else {
- /* Enable NORST and DD2 features */
- cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
- }
+ /* Enable NORST and DD2 features */
+ cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
/*
* Check if PSL has data-cache. We need to flush adapter datacache