Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Tesarik <ptesarik@suse.cz>2019-07-30 17:17:36 +0200
committerPetr Tesarik <ptesarik@suse.cz>2019-07-30 17:29:48 +0200
commitea972d4b604cf3dc57f02828acea60437e99d82d (patch)
treecb3622e4cc2b9c797890753b86b70b2d412a6364
parentdc1719600e621182f6649d54b91b3f0cf184520b (diff)
parent5d42f4382f42c649d0c135842b526d9ccb286a66 (diff)
Merge branch 'SLE15_EMBARGO' into SLE15-SP1_EMBARGO
-rw-r--r--patches.arch/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch186
-rw-r--r--patches.arch/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch250
-rw-r--r--patches.arch/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch152
-rw-r--r--patches.suse/0005-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch32
-rw-r--r--patches.suse/x86-cpufeatures-Combine-word-11-and-12-into-a-new-sc.patch15
-rw-r--r--patches.suse/x86-cpufeatures-Enumerate-the-new-AVX512-BFLOAT16-in.patch6
-rw-r--r--series.conf3
7 files changed, 619 insertions, 25 deletions
diff --git a/patches.arch/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch b/patches.arch/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch
new file mode 100644
index 0000000000..809d745ae1
--- /dev/null
+++ b/patches.arch/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch
@@ -0,0 +1,186 @@
+From 5959e5a374b223ab888aa4866bd51ecc305ffdb2 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 8 Jul 2019 11:52:25 -0500
+Subject: [PATCH 1/2] x86/speculation: Prepare entry code for Spectre v1 swapgs
+ mitigations
+Patch-mainline: not yet, embargo
+References: bsc#1139358, CVE-2019-1125
+
+commit 18ec54fdd6d18d92025af097cd042a75cf0ea24c upstream
+
+Spectre v1 isn't only about array bounds checks. It can affect any
+conditional checks. The kernel entry code interrupt, exception, and NMI
+handlers all have conditional swapgs checks. Those may be problematic in
+the context of Spectre v1, as kernel code can speculatively run with a user
+GS.
+
+For example:
+
+ if (coming from user space)
+ swapgs
+ mov %gs:<percpu_offset>, %reg
+ mov (%reg), %reg1
+
+When coming from user space, the CPU can speculatively skip the swapgs, and
+then do a speculative percpu load using the user GS value. So the user can
+speculatively force a read of any kernel value. If a gadget exists which
+uses the percpu value as an address in another load/store, then the
+contents of the kernel value may become visible via an L1 side channel
+attack.
+
+A similar attack exists when coming from kernel space. The CPU can
+speculatively do the swapgs, causing the user GS to get used for the rest
+of the speculative window.
+
+The mitigation is similar to a traditional Spectre v1 mitigation, except:
+
+ a) index masking isn't possible; because the index (percpu offset)
+ isn't user-controlled; and
+
+ b) an lfence is needed in both the "from user" swapgs path and the
+ "from kernel" non-swapgs path (because of the two attacks described
+ above).
+
+The user entry swapgs paths already have SWITCH_TO_KERNEL_CR3, which has a
+CR3 write when PTI is enabled. Since CR3 writes are serializing, the
+lfences can be skipped in those cases.
+
+On the other hand, the kernel entry swapgs paths don't depend on PTI.
+
+To avoid unnecessary lfences for the user entry case, create two separate
+features for alternative patching:
+
+ X86_FEATURE_FENCE_SWAPGS_USER
+ X86_FEATURE_FENCE_SWAPGS_KERNEL
+
+Use these features in entry code to patch in lfences where needed.
+
+The features aren't enabled yet, so there's no functional change.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+
+---
+ arch/x86/entry/calling.h | 17 +++++++++++++++++
+ arch/x86/entry/entry_64.S | 19 +++++++++++++++++--
+ arch/x86/include/asm/cpufeatures.h | 2 ++
+ 3 files changed, 36 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/calling.h
++++ b/arch/x86/entry/calling.h
+@@ -331,6 +331,23 @@ For 32-bit we have the following convent
+
+ #endif
+
++/*
++ * Mitigate Spectre v1 for conditional swapgs code paths.
++ *
++ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
++ * prevent a speculative swapgs when coming from kernel space.
++ *
++ * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
++ * to prevent the swapgs from getting speculatively skipped when coming from
++ * user space.
++ */
++.macro FENCE_SWAPGS_USER_ENTRY
++ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
++.endm
++.macro FENCE_SWAPGS_KERNEL_ENTRY
++ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
++.endm
++
+ #endif /* CONFIG_X86_64 */
+
+ /*
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -527,9 +527,12 @@ END(irq_entries_start)
+ testb $3, CS-ORIG_RAX(%rsp)
+ jz 1f
+ SWAPGS
++ FENCE_SWAPGS_USER_ENTRY
+ call switch_to_thread_stack
++ jmp 2f
+ 1:
+-
++ FENCE_SWAPGS_KERNEL_ENTRY
++2:
+ PUSH_AND_CLEAR_REGS
+ ENCODE_FRAME_POINTER
+
+@@ -1167,6 +1170,12 @@ ENTRY(paranoid_entry)
+
+ 1:
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
++ /*
++ * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
++ * unconditional CR3 write, even in the PTI case. So do an lfence
++ * to prevent GS speculation, regardless of whether PTI is enabled.
++ */
++ FENCE_SWAPGS_KERNEL_ENTRY
+
+ ret
+ END(paranoid_entry)
+@@ -1216,6 +1225,7 @@ ENTRY(error_entry)
+ * from user mode due to an IRET fault.
+ */
+ SWAPGS
++ FENCE_SWAPGS_USER_ENTRY
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+
+@@ -1237,6 +1247,8 @@ ENTRY(error_entry)
+ CALL_enter_from_user_mode
+ ret
+
++.Lerror_entry_done_lfence:
++ FENCE_SWAPGS_KERNEL_ENTRY
+ .Lerror_entry_done:
+ TRACE_IRQS_OFF
+ ret
+@@ -1255,7 +1267,7 @@ ENTRY(error_entry)
+ cmpq %rax, RIP+8(%rsp)
+ je .Lbstep_iret
+ cmpq $.Lgs_change, RIP+8(%rsp)
+- jne .Lerror_entry_done
++ jne .Lerror_entry_done_lfence
+
+ /*
+ * hack: .Lgs_change can fail with user gsbase. If this happens, fix up
+@@ -1263,6 +1275,7 @@ ENTRY(error_entry)
+ * .Lgs_change's error handler with kernel gsbase.
+ */
+ SWAPGS
++ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ jmp .Lerror_entry_done
+
+@@ -1277,6 +1290,7 @@ ENTRY(error_entry)
+ * gsbase and CR3. Switch to kernel gsbase and CR3:
+ */
+ SWAPGS
++ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+
+ /*
+@@ -1368,6 +1382,7 @@ ENTRY(nmi)
+
+ swapgs
+ cld
++ FENCE_SWAPGS_USER_ENTRY
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
+ movq %rsp, %rdx
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -272,6 +272,8 @@
+
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+ #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
++#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
++#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+
+ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
diff --git a/patches.arch/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch b/patches.arch/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch
new file mode 100644
index 0000000000..76cc777b99
--- /dev/null
+++ b/patches.arch/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch
@@ -0,0 +1,250 @@
+From d412fa2658218300263e965d3f5e23121153f391 Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Mon, 8 Jul 2019 11:52:26 -0500
+Subject: [PATCH 2/2] x86/speculation: Enable Spectre v1 swapgs mitigations
+Patch-mainline: not yet, embargo
+References: bsc#1139358, CVE-2019-1125
+
+commit a2059825986a1c8143fd6698774fa9d83733bb11 upstream
+
+The previous commit added macro calls in the entry code which mitigate the
+Spectre v1 swapgs issue if the X86_FEATURE_FENCE_SWAPGS_* features are
+enabled. Enable those features where applicable.
+
+The mitigations may be disabled with "nospectre_v1" or "mitigations=off".
+
+There are different features which can affect the risk of attack:
+
+- When FSGSBASE is enabled, unprivileged users are able to place any
+ value in GS, using the wrgsbase instruction. This means they can
+ write a GS value which points to any value in kernel space, which can
+ be useful with the following gadget in an interrupt/exception/NMI
+ handler:
+
+ if (coming from user space)
+ swapgs
+ mov %gs:<percpu_offset>, %reg1
+ // dependent load or store based on the value of %reg
+ // for example: mov %(reg1), %reg2
+
+ If an interrupt is coming from user space, and the entry code
+ speculatively skips the swapgs (due to user branch mistraining), it
+ may speculatively execute the GS-based load and a subsequent dependent
+ load or store, exposing the kernel data to an L1 side channel leak.
+
+ Note that, on Intel, a similar attack exists in the above gadget when
+ coming from kernel space, if the swapgs gets speculatively executed to
+ switch back to the user GS. On AMD, this variant isn't possible
+ because swapgs is serializing with respect to future GS-based
+ accesses.
+
+ NOTE: The FSGSBASE patch set hasn't been merged yet, so the above case
+ doesn't exist quite yet.
+
+- When FSGSBASE is disabled, the issue is mitigated somewhat because
+ unprivileged users must use prctl(ARCH_SET_GS) to set GS, which
+ restricts GS values to user space addresses only. That means the
+ gadget would need an additional step, since the target kernel address
+ needs to be read from user space first. Something like:
+
+ if (coming from user space)
+ swapgs
+ mov %gs:<percpu_offset>, %reg1
+ mov (%reg1), %reg2
+ // dependent load or store based on the value of %reg2
+ // for example: mov %(reg2), %reg3
+
+ It's difficult to audit for this gadget in all the handlers, so while
+ there are no known instances of it, it's entirely possible that it
+ exists somewhere (or could be introduced in the future). Without
+ tooling to analyze all such code paths, consider it vulnerable.
+
+ Effects of SMAP on the !FSGSBASE case:
+
+ - If SMAP is enabled, and the CPU reports RDCL_NO (i.e., not
+ susceptible to Meltdown), the kernel is prevented from speculatively
+ reading user space memory, even L1 cached values. This effectively
+ disables the !FSGSBASE attack vector.
+
+ - If SMAP is enabled, but the CPU *is* susceptible to Meltdown, SMAP
+ still prevents the kernel from speculatively reading user space
+ memory. But it does *not* prevent the kernel from reading the
+ user value from L1, if it has already been cached. This is probably
+ only a small hurdle for an attacker to overcome.
+
+Thanks to Dave Hansen for contributing the speculative_smap() function.
+
+Thanks to Andrew Cooper for providing the inside scoop on whether swapgs
+is serializing on AMD.
+
+[ tglx: Fixed the USER fence decision and polished the comment as suggested
+ by Dave Hansen ]
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+
+---
+ Documentation/admin-guide/kernel-parameters.txt | 1
+ arch/x86/kernel/cpu/bugs.c | 115 ++++++++++++++++++++++--
+ 2 files changed, 107 insertions(+), 9 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2392,6 +2392,7 @@
+ Equivalent to: nopti [X86,PPC]
+ nospectre_v1 [PPC]
+ nobp=0 [S390]
++ nospectre_v1 [X86]
+ nospectre_v2 [X86,PPC,S390]
+ spectre_v2_user=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -31,6 +31,7 @@
+ #include <asm/hypervisor.h>
+ #include <asm/e820/api.h>
+
++static void __init spectre_v1_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+@@ -95,17 +96,11 @@ void __init check_bugs(void)
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
+- /* Select the proper spectre mitigation before patching alternatives */
++ /* Select the proper CPU mitigations before patching alternatives: */
++ spectre_v1_select_mitigation();
+ spectre_v2_select_mitigation();
+-
+- /*
+- * Select proper mitigation for any exposure to the Speculative Store
+- * Bypass vulnerability.
+- */
+ ssb_select_mitigation();
+-
+ l1tf_select_mitigation();
+-
+ mds_select_mitigation();
+
+ arch_smt_update();
+@@ -395,6 +390,108 @@ static int __init mds_cmdline(char *str)
+ early_param("mds", mds_cmdline);
+
+ #undef pr_fmt
++#define pr_fmt(fmt) "Spectre V1 : " fmt
++
++enum spectre_v1_mitigation {
++ SPECTRE_V1_MITIGATION_NONE,
++ SPECTRE_V1_MITIGATION_AUTO,
++};
++
++static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
++ SPECTRE_V1_MITIGATION_AUTO;
++
++static const char * const spectre_v1_strings[] = {
++ [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
++ [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
++};
++
++static bool is_swapgs_serializing(void)
++{
++ /*
++ * Technically, swapgs isn't serializing on AMD (despite it previously
++ * being documented as such in the APM). But according to AMD, %gs is
++ * updated non-speculatively, and the issuing of %gs-relative memory
++ * operands will be blocked until the %gs update completes, which is
++ * good enough for our purposes.
++ */
++ return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
++}
++
++/*
++ * Does SMAP provide full mitigation against speculative kernel access to
++ * userspace?
++ */
++static bool smap_works_speculatively(void)
++{
++ if (!boot_cpu_has(X86_FEATURE_SMAP))
++ return false;
++
++ /*
++ * On CPUs which are vulnerable to Meltdown, SMAP does not
++ * prevent speculative access to user data in the L1 cache.
++ * Consider SMAP to be non-functional as a mitigation on these
++ * CPUs.
++ */
++ if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
++ return false;
++
++ return true;
++}
++
++static void __init spectre_v1_select_mitigation(void)
++{
++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
++ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
++ return;
++ }
++
++ if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
++ /*
++ * With Spectre v1, a user can speculatively control either
++ * path of a conditional swapgs with a user-controlled GS
++ * value. The mitigation is to add lfences to both code paths.
++ *
++ * If FSGSBASE is enabled, the user can put a kernel address in
++ * GS, in which case SMAP provides no protection.
++ *
++ * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
++ * FSGSBASE enablement patches have been merged. ]
++ *
++ * If FSGSBASE is disabled, the user can only put a user space
++ * address in GS. That makes an attack harder, but still
++ * possible if there's no SMAP protection.
++ */
++ if (!smap_works_speculatively()) {
++ /*
++ * Mitigation can be provided from SWAPGS itself or
++ * PTI as the CR3 write in the Meltdown mitigation
++ * is serializing.
++ *
++ * If neither is there, mitigate with an LFENCE.
++ */
++ if (!is_swapgs_serializing() && !boot_cpu_has(X86_FEATURE_PTI))
++ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
++
++ /*
++ * Enable lfences in the kernel entry (non-swapgs)
++ * paths, to prevent user entry from speculatively
++ * skipping swapgs.
++ */
++ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
++ }
++ }
++
++ pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
++}
++
++static int __init nospectre_v1_cmdline(char *str)
++{
++ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
++ return 0;
++}
++early_param("nospectre_v1", nospectre_v1_cmdline);
++
++#undef pr_fmt
+ #define pr_fmt(fmt) "Spectre V2 : " fmt
+
+ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+@@ -1267,7 +1364,7 @@ static ssize_t cpu_show_common(struct de
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
++ return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
diff --git a/patches.arch/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch b/patches.arch/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch
new file mode 100644
index 0000000000..b9095cc6a4
--- /dev/null
+++ b/patches.arch/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch
@@ -0,0 +1,152 @@
+Subject: x86/speculation/swapgs: Exclude ATOMs from speculation through SWAPGS
+From: Thomas Gleixner <tglx@linutronix.de>
+Patch-mainline: not yet, embargo
+References: bsc#1139358, CVE-2019-1125
+
+Intel provided the following information:
+
+ On all current Atom processors, instructions that use a segment register
+ value (e.g. a load or store) will not speculatively execute before the
+ last writer of that segment retires. Thus they will not use a
+ speculatively written segment value.
+
+That means on ATOMs there is no speculation through SWAPGS, so the SWAPGS
+entry paths can be excluded from the extra LFENCE if PTI is disabled.
+
+Create a separate bug flag for the through SWAPGS speculation and mark all
+out-of-order ATOMs and AMD/HYGON CPUs as not affected. The in-order ATOMs
+are excluded from the whole mitigation mess anyway.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+
+---
+V2: Reworded change log with the 'official' statement, no code changes.
+ Picked up Reviewed-by tags.
+---
+ arch/x86/include/asm/cpufeatures.h | 1
+ arch/x86/kernel/cpu/bugs.c | 18 +++------------
+ arch/x86/kernel/cpu/common.c | 44 +++++++++++++++++++++++--------------
+ 3 files changed, 33 insertions(+), 30 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -383,4 +383,5 @@
+ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
+ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
++#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -405,18 +405,6 @@ static const char * const spectre_v1_str
+ [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
+ };
+
+-static bool is_swapgs_serializing(void)
+-{
+- /*
+- * Technically, swapgs isn't serializing on AMD (despite it previously
+- * being documented as such in the APM). But according to AMD, %gs is
+- * updated non-speculatively, and the issuing of %gs-relative memory
+- * operands will be blocked until the %gs update completes, which is
+- * good enough for our purposes.
+- */
+- return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+-}
+-
+ /*
+ * Does SMAP provide full mitigation against speculative kernel access to
+ * userspace?
+@@ -467,9 +455,11 @@ static void __init spectre_v1_select_mit
+ * PTI as the CR3 write in the Meltdown mitigation
+ * is serializing.
+ *
+- * If neither is there, mitigate with an LFENCE.
++ * If neither is there, mitigate with an LFENCE to
++ * stop speculation through swapgs.
+ */
+- if (!is_swapgs_serializing() && !boot_cpu_has(X86_FEATURE_PTI))
++ if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
++ !boot_cpu_has(X86_FEATURE_PTI))
+ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
+
+ /*
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -904,6 +904,7 @@ static void identify_cpu_without_cpuid(s
+ #define NO_L1TF BIT(3)
+ #define NO_MDS BIT(4)
+ #define MSBDS_ONLY BIT(5)
++#define NO_SWAPGS BIT(6)
+
+ #define VULNWL(_vendor, _family, _model, _whitelist) \
+ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+@@ -930,30 +931,38 @@ static const __initconst struct x86_cpu_
+ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
+ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
+
+- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
+- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
+- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
+- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
+- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
+- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
++ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+
+ VULNWL_INTEL(CORE_YONAH, NO_SSB),
+
+- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
++ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+
+- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
+- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
+- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
++ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS),
++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
++
++ /*
++ * Technically, swapgs isn't serializing on AMD (despite it previously
++ * being documented as such in the APM). But according to AMD, %gs is
++ * updated non-speculatively, and the issuing of %gs-relative memory
++ * operands will be blocked until the %gs update completes, which is
++ * good enough for our purposes.
++ */
+
+ /* AMD Family 0xf - 0x12 */
+- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+
+ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
+- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+ {}
+ };
+
+@@ -990,6 +999,9 @@ static void __init cpu_set_bug_bits(stru
+ setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
+ }
+
++ if (!cpu_matches(NO_SWAPGS))
++ setup_force_cpu_bug(X86_BUG_SWAPGS);
++
+ if (cpu_matches(NO_MELTDOWN))
+ return;
+
diff --git a/patches.suse/0005-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch b/patches.suse/0005-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch
index ad473781a0..42380456c7 100644
--- a/patches.suse/0005-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch
+++ b/patches.suse/0005-x86-enter-Use-IBRS-on-syscall-and-interrupts.patch
@@ -105,7 +105,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
/*
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
-@@ -610,11 +616,12 @@ GLOBAL(swapgs_restore_regs_and_return_to
+@@ -613,11 +619,12 @@ GLOBAL(swapgs_restore_regs_and_return_to
/* Push user RDI on the trampoline stack. */
pushq (%rdi)
@@ -119,7 +119,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
/* Restore RDI. */
-@@ -701,6 +708,13 @@ native_irq_return_ldt:
+@@ -704,6 +711,13 @@ native_irq_return_ldt:
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
@@ -133,7 +133,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
-@@ -847,6 +861,8 @@ ENTRY(switch_to_thread_stack)
+@@ -850,6 +864,8 @@ ENTRY(switch_to_thread_stack)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -142,16 +142,16 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
pushq 7*8(%rdi) /* regs->ss */
-@@ -1153,6 +1169,8 @@ ENTRY(paranoid_entry)
+@@ -1170,6 +1186,8 @@ ENTRY(paranoid_entry)
1:
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ /* Restrict Indirect Branch speculation */
+ RESTRICT_IB_SPEC_SAVE_AND_CLOBBER save_reg=%r13d
-
- ret
- END(paranoid_entry)
-@@ -1176,6 +1194,8 @@ ENTRY(paranoid_exit)
+ /*
+ * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
+ * unconditional CR3 write, even in the PTI case. So do an lfence
+@@ -1199,6 +1217,8 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
@@ -160,8 +160,8 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
-@@ -1204,6 +1224,8 @@ ENTRY(error_entry)
- SWAPGS
+@@ -1228,6 +1248,8 @@ ENTRY(error_entry)
+ FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ /* Restrict Indirect Branch Speculation */
@@ -169,25 +169,25 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
-@@ -1250,6 +1272,8 @@ ENTRY(error_entry)
- */
+@@ -1277,6 +1299,8 @@ ENTRY(error_entry)
SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ /* Restrict Indirect Branch Speculation */
+ RESTRICT_IB_SPEC_CLOBBER
jmp .Lerror_entry_done
.Lbstep_iret:
-@@ -1264,6 +1288,8 @@ ENTRY(error_entry)
- */
+@@ -1292,6 +1316,8 @@ ENTRY(error_entry)
SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ /* Restrict Indirect Branch Speculation */
+ RESTRICT_IB_SPEC
/*
* Pretend that the exception came from user mode: set up pt_regs
-@@ -1357,6 +1383,10 @@ ENTRY(nmi)
+@@ -1386,6 +1412,10 @@ ENTRY(nmi)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -198,7 +198,7 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
-@@ -1591,6 +1621,9 @@ end_repeat_nmi:
+@@ -1620,6 +1650,9 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
diff --git a/patches.suse/x86-cpufeatures-Combine-word-11-and-12-into-a-new-sc.patch b/patches.suse/x86-cpufeatures-Combine-word-11-and-12-into-a-new-sc.patch
index 537db91873..08b87c45c2 100644
--- a/patches.suse/x86-cpufeatures-Combine-word-11-and-12-into-a-new-sc.patch
+++ b/patches.suse/x86-cpufeatures-Combine-word-11-and-12-into-a-new-sc.patch
@@ -83,17 +83,12 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
CPUID_8000_000A_EDX,
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -270,13 +270,16 @@
+@@ -270,16 +270,19 @@
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
--
--/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
--#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
--#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
--#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
@@ -104,9 +99,17 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
+ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+-
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -770,33 +770,25 @@ static void init_speculation_control(str
diff --git a/patches.suse/x86-cpufeatures-Enumerate-the-new-AVX512-BFLOAT16-in.patch b/patches.suse/x86-cpufeatures-Enumerate-the-new-AVX512-BFLOAT16-in.patch
index 37ef2ab272..652398ffd7 100644
--- a/patches.suse/x86-cpufeatures-Enumerate-the-new-AVX512-BFLOAT16-in.patch
+++ b/patches.suse/x86-cpufeatures-Enumerate-the-new-AVX512-BFLOAT16-in.patch
@@ -72,9 +72,9 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
CPUID_8000_000A_EDX,
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
-@@ -281,6 +281,9 @@
- #define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
- #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
+@@ -283,6 +283,9 @@
+ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
+ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
diff --git a/series.conf b/series.conf
index 46e7f8ecd4..b556349330 100644
--- a/series.conf
+++ b/series.conf
@@ -48229,6 +48229,9 @@
patches.kabi/kabi-kvm-mmu-Fix-overflow-on-kvm-mmu-page-limit-calculati.patch
patches.arch/mm-nvdimm-add-is_ioremap_addr-and-use-that-to-check-.patch
patches.drivers/scsi-qla2xxx-do-not-crash-on-uninitialized-pool-list.patch
+ patches.arch/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch
+ patches.arch/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch
+ patches.arch/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch
########################################################
# end of sorted patches