Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2018-05-18 16:48:02 +0200
committerJiri Kosina <jkosina@suse.cz>2018-05-18 16:48:02 +0200
commit2f331d6a7740a54be4af1372afbe1795117e92fe (patch)
tree852c9d88866101cb2d52b8f27c141252e2d5d83e
parent99406b39cb5bc8c886fb0f2edac36aae7b491a11 (diff)
parent25fd23dc2fd1ae38ac0da126f13f6ee12920ab90 (diff)
Merge remote-tracking branch 'origin/users/bpetkov/SLE15/ssb' into users/jkosina/SLE15/ssb
Pull SSBD patches from Borislav Petkov suse-commit: 93e52595dc83d0ec1b5345fdc54b969318bd7a9b
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt45
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst94
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/nospec-branch.h51
-rw-r--r--arch/x86/include/asm/spec-ctrl.h38
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/kernel/cpu/amd.c26
-rw-r--r--arch/x86/kernel/cpu/bugs.c381
-rw-r--r--arch/x86/kernel/cpu/common.c54
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/process.c22
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c16
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--fs/proc/array.c25
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/nospec.h10
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/seccomp.h3
-rw-r--r--include/uapi/linux/prctl.h12
-rw-r--r--include/uapi/linux/seccomp.h3
-rw-r--r--kernel/seccomp.c21
-rw-r--r--kernel/sys.c23
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c6
28 files changed, 814 insertions, 65 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 258902db14bf..8355e79350b7 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -378,6 +378,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2fce2a81eadc..e28a6dbce1dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2591,6 +2591,9 @@
allow data leaks with this option, which is equivalent
to spectre_v2=off.
+ nospec_store_bypass_disable
+ [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -3904,6 +3907,48 @@
Not specifying this option is equivalent to
spectre_v2=auto.
+ spec_store_bypass_disable=
+ [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ (Speculative Store Bypass vulnerability)
+
+ Certain CPUs are vulnerable to an exploit against a
+ a common industry wide performance optimization known
+ as "Speculative Store Bypass" in which recent stores
+ to the same memory location may not be observed by
+ later loads during speculative execution. The idea
+ is that such stores are unlikely and that they can
+ be detected prior to instruction retirement at the
+ end of a particular speculation execution window.
+
+ In vulnerable processors, the speculatively forwarded
+ store can be used in a cache side channel attack, for
+ example to read memory to which the attacker does not
+ directly have access (e.g. inside sandboxed code).
+
+ This parameter controls whether the Speculative Store
+ Bypass optimization is used.
+
+ on - Unconditionally disable Speculative Store Bypass
+ off - Unconditionally enable Speculative Store Bypass
+ auto - Kernel detects whether the CPU model contains an
+ implementation of Speculative Store Bypass and
+ picks the most appropriate mitigation. If the
+ CPU is not vulnerable, "off" is selected. If the
+ CPU is vulnerable the default mitigation is
+ architecture and Kconfig dependent. See below.
+ prctl - Control Speculative Store Bypass per thread
+ via prctl. Speculative Store Bypass is enabled
+ for a process by default. The state of the control
+ is inherited on fork.
+ seccomp - Same as "prctl" above, but all seccomp threads
+ will disable SSB unless they explicitly opt out.
+
+ Not specifying this option is equivalent to
+ spec_store_bypass_disable=auto.
+
+ Default mitigations:
+ X86: If CONFIG_SECCOMP=y "seccomp", otherwise "prctl"
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
new file mode 100644
index 000000000000..1b3690d30943
--- /dev/null
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -0,0 +1,94 @@
+===================
+Speculation Control
+===================
+
+Quite some CPUs have speculation related misfeatures which are in fact
+vulnerabilites causing data leaks in various forms even accross privilege
+domains.
+
+The kernel provides mitigation for such vulnerabilities in various
+forms. Some of these mitigations are compile time configurable and some on
+the kernel command line.
+
+There is also a class of mitigations which are very expensive, but they can
+be restricted to a certain set of processes or tasks in controlled
+environments. The mechanism to control these mitigations is via
+:manpage:`prctl(2)`.
+
+There are two prctl options which are related to this:
+
+ * PR_GET_SPECULATION_CTRL
+
+ * PR_SET_SPECULATION_CTRL
+
+PR_GET_SPECULATION_CTRL
+-----------------------
+
+PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
+the following meaning:
+
+==== ===================== ===================================================
+Bit Define Description
+==== ===================== ===================================================
+0 PR_SPEC_PRCTL Mitigation can be controlled per task by
+ PR_SET_SPECULATION_CTRL
+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
+ disabled
+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
+ enabled
+3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+ subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
+
+If all bits are 0 the CPU is not affected by the speculation misfeature.
+
+If PR_SPEC_PRCTL is set, then the per task control of the mitigation is
+available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation
+misfeature will fail.
+
+PR_SET_SPECULATION_CTRL
+-----------------------
+
+PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
+is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
+
+Common error codes
+------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+EINVAL The prctl is not implemented by the architecture or unused
+ prctl(2) arguments are not 0
+
+ENODEV arg2 is selecting a not supported speculation misfeature
+======= =================================================================
+
+PR_SET_SPECULATION_CTRL error codes
+-----------------------------------
+======= =================================================================
+Value Meaning
+======= =================================================================
+0 Success
+
+ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
+ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE
+
+ENXIO Control of the selected speculation misfeature is not possible.
+ See PR_GET_SPECULATION_CTRL.
+
+EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+ tried to enable it again.
+======= =================================================================
+
+Speculation misfeature controls
+-------------------------------
+- PR_SPEC_STORE_BYPASS: Speculative Store Bypass
+
+ Invocations:
+ * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a67b11df51db..60e6c8effa4e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -215,6 +215,8 @@
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_USE_IBRS ( 7*32+23) /* "" Use IBRS for Spectre v2 safety */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+24) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_AMD_SSBD (7*32+25) /* "" AMD SSBD implementation */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -331,6 +333,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -360,4 +363,5 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b2c60697f9cf..4a7932ed1ca2 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,8 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -67,6 +69,11 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SSBD_NO (1 << 4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 55660055fce7..e1bbcb9e5c69 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -214,6 +214,25 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/*
+ * The Intel specification for the SPEC_CTRL MSR requires that we
+ * preserve any already set reserved bits at boot time (e.g. for
+ * future additions that this kernel is not currently aware of).
+ * We then set any additional mitigation bits that we want
+ * ourselves and always use this as the base for SPEC_CTRL.
+ * We also use this when handling guest entry/exit as below.
+ */
+extern void x86_spec_ctrl_set(u64);
+extern u64 x86_spec_ctrl_get_default(void);
+
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
+ SPEC_STORE_BYPASS_SECCOMP,
+};
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@@ -236,20 +255,22 @@ static inline void vmexit_fill_RSB(void)
: : "memory" );
}
-#define alternative_msr_write(_msr, _val, _feature) \
- asm volatile(ALTERNATIVE("", \
- "movl %[msr], %%ecx\n\t" \
- "movl %[val], %%eax\n\t" \
- "movl $0, %%edx\n\t" \
- "wrmsr", \
- _feature) \
- : : [msr] "i" (_msr), [val] "i" (_val) \
- : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+ : : "c" (msr),
+ "a" (val),
+ "d" (val >> 32),
+ [feature] "i" (feature)
+ : "memory");
+}
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
- X86_FEATURE_USE_IBPB);
+ u64 val = PRED_CMD_IBPB;
+
+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
/*
@@ -296,14 +317,18 @@ static inline void unrestrict_branch_speculation(void)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
+ u64 val = x86_spec_ctrl_get_default() | SPEC_CTRL_IBRS; \
+ \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ u64 val = x86_spec_ctrl_get_default(); \
+ \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..dc21209790bf
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter.
+ */
+extern void x86_spec_ctrl_set_guest(u64);
+extern void x86_spec_ctrl_restore_host(u64);
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+extern void speculative_store_bypass_update(void);
+
+#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index cf95135981aa..722715d40a7e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,6 +80,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -142,7 +144,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 013b0e88878e..e1ba8011dcba 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -10,6 +10,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@@ -553,6 +554,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes_per_socket = ((value >> 3) & 7) + 1;
}
+
+ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+ case 0x15: bit = 54; break;
+ case 0x16: bit = 33; break;
+ case 0x17: bit = 10; break;
+ default: return;
+ }
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_AMD_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+ }
+ }
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -886,6 +907,11 @@ static void init_amd(struct cpuinfo_x86 *c)
/* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+ set_cpu_cap(c, X86_FEATURE_AMD_SSBD);
+ }
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 803a0ca79168..7271cc15fb05 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,8 +11,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@@ -26,6 +28,26 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = ~SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@@ -36,9 +58,23 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
+ /*
+ * Select proper mitigation for any exposure to the Speculative Store
+ * Bypass vulnerability.
+ */
+ ssb_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -94,7 +130,65 @@ static const char *spectre_v2_strings[] = {
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+void x86_spec_ctrl_set(u64 val)
+{
+ if (val & x86_spec_ctrl_mask)
+ WARN_ONCE(1, "SPEC_CTRL MSR value 0x%16llx is unknown.\n", val);
+ else
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base | val);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set);
+
+u64 x86_spec_ctrl_get_default(void)
+{
+ u64 msrval = x86_spec_ctrl_base;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ msrval |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+ return msrval;
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_get_default);
+
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl)
+{
+ u64 host = x86_spec_ctrl_base;
+
+ if (!boot_cpu_has(X86_FEATURE_IBRS))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, guest_spec_ctrl);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_set_guest);
+
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl)
+{
+ u64 host = x86_spec_ctrl_base;
+
+ if (!boot_cpu_has(X86_FEATURE_IBRS))
+ return;
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ host |= ssbd_tif_to_spec_ctrl(current_thread_info()->flags);
+
+ if (host != guest_spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, host);
+}
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_restore_host);
+
+static void x86_amd_ssb_disable(void)
+{
+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+ if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -330,32 +424,289 @@ retpoline_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_NONE,
+ SPEC_STORE_BYPASS_CMD_AUTO,
+ SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+ continue;
+
+ cmd = ssb_mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+ }
+ }
+
+ return cmd;
+}
+
+static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
+{
+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+ enum ssb_mitigation_cmd cmd;
+
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ return mode;
+
+ cmd = ssb_parse_cmdline();
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ return mode;
+
+ switch (cmd) {
+ case SPEC_STORE_BYPASS_CMD_AUTO:
+ case SPEC_STORE_BYPASS_CMD_SECCOMP:
+ /*
+ * Choose prctl+seccomp as the default mode if seccomp is
+ * enabled.
+ */
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPEC_STORE_BYPASS_SECCOMP;
+ else
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_ON:
+ mode = SPEC_STORE_BYPASS_DISABLE;
+ break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_NONE:
+ break;
+ }
+
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+ */
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+ * a completely different MSR and bit dependent on family.
+ */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask &= ~SPEC_CTRL_SSBD;
+ x86_spec_ctrl_set(SPEC_CTRL_SSBD);
+ break;
+ case X86_VENDOR_AMD:
+ x86_amd_ssb_disable();
+ break;
+ }
+ }
+
+ return mode;
+}
+
+static void ssb_select_mitigation()
+{
+ ssb_mode = __ssb_select_mitigation();
+
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ bool update;
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /*
+ * If being set on non-current task, delay setting the CPU
+ * mitigation until it is next scheduled.
+ */
+ if (task == current && update)
+ speculative_store_bypass_update();
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_SECCOMP:
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ x86_spec_ctrl_set(x86_spec_ctrl_base & ~x86_spec_ctrl_mask);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+}
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
- if (boot_cpu_has(X86_FEATURE_PTI))
- return sprintf(buf, "Mitigation: PTI\n");
+
+ switch (bug) {
+ case X86_BUG_CPU_MELTDOWN:
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+ default:
+ break;
+ }
+
return sprintf(buf, "Vulnerable\n");
}
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
- return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- return sprintf(buf, "Not affected\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5b5fbdd691f0..1ff36cc97f58 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,21 +894,55 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{}
};
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
+ { X86_VENDOR_CENTAUR, 5, },
+ { X86_VENDOR_INTEL, 5, },
+ { X86_VENDOR_NSC, 5, },
+ { X86_VENDOR_AMD, 0x12, },
+ { X86_VENDOR_AMD, 0x11, },
+ { X86_VENDOR_AMD, 0x10, },
+ { X86_VENDOR_AMD, 0xf, },
+ { X86_VENDOR_ANY, 4, },
+ {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
- if (x86_match_cpu(cpu_no_meltdown))
- return false;
-
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+ !(ia32_cap & ARCH_CAP_SSBD_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_speculation))
+ return;
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return;
+
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
- return false;
+ return;
- return true;
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
@@ -958,12 +992,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (!x86_match_cpu(cpu_no_speculation)) {
- if (cpu_vulnerable_to_meltdown(c))
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- }
+ cpu_set_bug_bits(c);
fpu__init_system(c);
}
@@ -1314,6 +1343,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
}
static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265d4745..ffe9faeace81 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,4 +46,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void x86_spec_ctrl_setup_ap(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f515441c3336..c921b647438c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,6 +188,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SSBD);
}
/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c55cd4a30417..493f421c894e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
#include <asm/switch_to.h>
#include <asm/desc.h>
#include <asm/prctl.h>
+#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,24 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
}
}
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+ u64 msr;
+
+ if (static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ } else {
+ msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ }
+}
+
+void speculative_store_bypass_update(void)
+{
+ __speculative_store_bypass_update(current_thread_info()->flags);
+}
+
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
@@ -309,6 +328,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+ if ((tifp ^ tifn) & _TIF_SSBD)
+ __speculative_store_bypass_update(tifn);
}
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 21dc5af583b5..cb97f65550c6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -394,7 +394,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+ F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SSBD) |
F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0f6cdc28d9ff..d455d728646f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -50,7 +50,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -5355,8 +5355,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5468,8 +5467,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(svm->spec_ctrl);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 924064ba5690..936e8517d39a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -52,7 +52,7 @@
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include "trace.h"
#include "pmu.h"
@@ -3297,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SSBD))
return 1;
msr_info->data = to_vmx(vcpu)->spec_ctrl;
@@ -3418,11 +3419,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SSBD))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
@@ -9463,8 +9465,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
@@ -9602,8 +9603,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 825964efda1d..433f14bcab15 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -521,14 +521,22 @@ ssize_t __weak cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
&dev_attr_spectre_v1.attr,
&dev_attr_spectre_v2.attr,
+ &dev_attr_spec_store_bypass.attr,
NULL
};
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9c09192c560d..7db34eb24816 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -84,6 +84,7 @@
#include <linux/delayacct.h>
#include <linux/seq_file.h>
#include <linux/pid_namespace.h>
+#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/string_helpers.h>
@@ -351,6 +352,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_printf(m, "\nSpeculation_Store_Bypass:\t");
+ switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
+ case -EINVAL:
+ seq_printf(m, "unknown");
+ break;
+ case PR_SPEC_NOT_AFFECTED:
+ seq_printf(m, "not vulnerable");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
+ seq_printf(m, "thread mitigated");
+ break;
+ case PR_SPEC_PRCTL | PR_SPEC_ENABLE:
+ seq_printf(m, "thread vulnerable");
+ break;
+ case PR_SPEC_DISABLE:
+ seq_printf(m, "globally mitigated");
+ break;
+ default:
+ seq_printf(m, "vulnerable");
+ break;
+ }
seq_putc(m, '\n');
}
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 3fe87e7e43a6..3423ace0c431 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -52,6 +52,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index fbc98e2c8228..6efc581d6fc7 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -6,6 +6,8 @@
#ifndef _LINUX_NOSPEC_H
#define _LINUX_NOSPEC_H
+struct task_struct;
+
/**
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
* @index: array element index
@@ -75,4 +77,12 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
_i &= _mask; \
_i; \
})
+
+/* Speculation control prctl */
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which);
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl);
+/* Speculation control for seccomp enforced mitigation */
+void arch_seccomp_spec_mitigate(struct task_struct *task);
+
#endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f65dd77e44da..370f127ef39c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1294,7 +1294,8 @@ extern struct pid *cad_pid;
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
-
+#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1319,6 +1320,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index ecc296c137cd..50c460a956f1 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
#include <uapi/linux/seccomp.h>
-#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW)
#ifdef CONFIG_SECCOMP
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..64776b72e1eb 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,16 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..8bdf2882d9aa 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -15,7 +15,8 @@
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
/*
* All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bd3877f998c7..7210a1c5c443 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,6 +17,8 @@
#include <linux/audit.h>
#include <linux/compat.h>
#include <linux/coredump.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/seccomp.h>
@@ -215,8 +217,11 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
return true;
}
+void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
+
static inline void seccomp_assign_mode(struct task_struct *task,
- unsigned long seccomp_mode)
+ unsigned long seccomp_mode,
+ unsigned long flags)
{
assert_spin_locked(&task->sighand->siglock);
@@ -226,6 +231,9 @@ static inline void seccomp_assign_mode(struct task_struct *task,
* filter) is set.
*/
smp_mb__before_atomic();
+ /* Assume default seccomp processes want spec flaw mitigation. */
+ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
+ arch_seccomp_spec_mitigate(task);
set_tsk_thread_flag(task, TIF_SECCOMP);
}
@@ -293,7 +301,7 @@ static inline pid_t seccomp_can_sync_threads(void)
* without dropping the locks.
*
*/
-static inline void seccomp_sync_threads(void)
+static inline void seccomp_sync_threads(unsigned long flags)
{
struct task_struct *thread, *caller;
@@ -334,7 +342,8 @@ static inline void seccomp_sync_threads(void)
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
- seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+ seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
+ flags);
}
}
@@ -453,7 +462,7 @@ static long seccomp_attach_filter(unsigned int flags,
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
- seccomp_sync_threads();
+ seccomp_sync_threads(flags);
return 0;
}
@@ -728,7 +737,7 @@ static long seccomp_set_mode_strict(void)
#ifdef TIF_NOTSC
disable_TSC();
#endif
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, 0);
ret = 0;
out:
@@ -786,7 +795,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
/* Do not free the successfully attached filter. */
prepared = NULL;
- seccomp_assign_mode(current, seccomp_mode);
+ seccomp_assign_mode(current, seccomp_mode, flags);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
diff --git a/kernel/sys.c b/kernel/sys.c
index e48f0636c7fd..2e62b625d105 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -60,6 +60,8 @@
#include <linux/uidgid.h>
#include <linux/cred.h>
+#include <linux/nospec.h>
+
#include <linux/kmsg_dump.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>
@@ -2093,6 +2095,17 @@ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
return 1;
}
+int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
+{
+ return -EINVAL;
+}
+
+int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
+ unsigned long ctrl)
+{
+ return -EINVAL;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2295,6 +2308,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_GET_SPECULATION_CTRL:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_get(me, arg2);
+ break;
+ case PR_SET_SPECULATION_CTRL:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 03f1fa495d74..cdc074b922e1 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1684,7 +1684,11 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
#endif
#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#endif
#ifndef seccomp