Home Home > GIT Browse > openSUSE-42.2
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2018-01-19 07:12:10 +0100
committerKernel Build Daemon <kbuild@suse.de>2018-01-19 07:12:10 +0100
commitc418a665194d12861219d3925346cb19f6b4dbce (patch)
treedd2a6eee8491bc587d8088803d7f8468b7b2f0c2
parentd4013116da1ac18f9d142fabb91783813c6b8ec4 (diff)
parentd9a9b09d1ac8a747134ef2d237ff4a9afda1d286 (diff)
Merge branch 'SLE12-SP2' into openSUSE-42.2openSUSE-42.2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu16
-rw-r--r--Documentation/kernel-parameters.txt49
-rw-r--r--Documentation/x86/pti.txt186
-rw-r--r--Makefile4
-rw-r--r--arch/arm/kvm/mmio.c6
-rw-r--r--arch/mips/kernel/process.c12
-rw-r--r--arch/mips/kernel/ptrace.c147
-rw-r--r--arch/x86/Kconfig14
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S3
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S3
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S3
-rw-r--r--arch/x86/entry/entry_32.S10
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/include/asm/asm-prototypes.h41
-rw-r--r--arch/x86/include/asm/asm.h11
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/kaiser.h10
-rw-r--r--arch/x86/include/asm/nospec-branch.h190
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/proto.h1
-rw-r--r--arch/x86/include/asm/spec_ctrl.h2
-rw-r--r--arch/x86/include/asm/thread_info.h11
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/alternative.c7
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/bugs.c213
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c14
-rw-r--r--arch/x86/kernel/cpu/spec_ctrl.c13
-rw-r--r--arch/x86/kernel/irq_32.c15
-rw-r--r--arch/x86/kernel/mcount_64.S7
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c31
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/checksum_32.S7
-rw-r--r--arch/x86/lib/retpoline.S48
-rw-r--r--arch/x86/mm/kaiser.c2
-rw-r--r--arch/x86/realmode/init.c4
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S3
-rw-r--r--crypto/algapi.c12
-rw-r--r--drivers/base/Kconfig3
-rw-r--r--drivers/base/cpu.c48
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c2
-rw-r--r--drivers/hv/hv.c11
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c3
-rw-r--r--drivers/iommu/arm-smmu-v3.c8
-rw-r--r--drivers/md/dm-bufio.c7
-rw-r--r--drivers/net/can/usb/gs_usb.c2
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c29
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c6
-rw-r--r--drivers/staging/android/ashmem.c2
-rw-r--r--drivers/target/iscsi/iscsi_target.c20
-rw-r--r--drivers/target/target_core_tmr.c9
-rw-r--r--drivers/target/target_core_transport.c2
-rw-r--r--drivers/usb/host/xhci-mem.c3
-rw-r--r--drivers/usb/misc/usb3503.c2
-rw-r--r--drivers/usb/mon/mon_bin.c8
-rw-r--r--drivers/usb/serial/cp210x.c2
-rw-r--r--drivers/usb/storage/unusual_uas.h7
-rw-r--r--drivers/usb/usbip/usbip_common.c17
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/dax.c23
-rw-r--r--fs/ext4/file.c9
-rw-r--r--include/asm-generic/asm-prototypes.h13
-rw-r--r--include/asm-generic/export.h94
-rw-r--r--include/linux/cpu.h7
-rw-r--r--include/linux/dax.h1
-rw-r--r--include/linux/kconfig.h11
-rw-r--r--include/linux/phy.h11
-rw-r--r--include/linux/sh_eth.h1
-rw-r--r--include/target/target_core_base.h1
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c500
88 files changed, 1901 insertions, 246 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index b683e8ee69ec..ea6a043f5beb 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -271,3 +271,19 @@ Description: Parameters for the CPU cache attributes
- WriteBack: data is written only to the cache line and
the modified cache line is written to main
memory only when it is replaced
+
+What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date: January 2018
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Information about CPU vulnerabilities
+
+ The files are named after the code names of CPU
+ vulnerabilities. The output of those files reflects the
+ state of the CPUs in the system. Possible output values:
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+ "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cf0fa0d58cb0..c2d4d7787ccd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2496,6 +2496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
+ (indirect branch prediction) vulnerability. System may
+ allow data leaks with this option, which is equivalent
+ to spectre_v2=off.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -2567,8 +2572,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nojitter [IA-64] Disables jitter checking for ITC timers.
- nopti [X86-64] Disable KAISER isolation of kernel from user.
-
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
@@ -3124,11 +3127,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
- pti= [X86_64]
- Control KAISER user/kernel address space isolation:
- on - enable
- off - disable
- auto - default setting
+ pti= [X86_64] Control Page Table Isolation of user and
+ kernel address spaces. Disabling this feature
+ removes hardening, but improves performance of
+ system calls and interrupts.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable to issues that PTI mitigates
+
+ Not specifying this option is equivalent to pti=auto.
+
+ nopti [X86_64]
+ Equivalent to pti=off
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
@@ -3669,6 +3681,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable
+
+ Selecting 'on' will, and 'auto' may, choose a
+ mitigation method at run time according to the
+ CPU, the available microcode, the setting of the
+ CONFIG_RETPOLINE configuration option, and the
+ compiler with which the kernel was built.
+
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+ retpoline,generic - google's original retpoline
+ retpoline,amd - AMD-specific minimal thunk
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 000000000000..d11eff61fc9a
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. A "trampoline" must be used for SYSCALL entry. This
+ trampoline depends on a smaller set of resources than the
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+ d. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
diff --git a/Makefile b/Makefile
index aff2ca315f6e..aabb2a770024 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 111
-EXTRAVERSION =
+SUBLEVEL = 112
+EXTRAVERSION = -rc2
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 3a10c9f1d0a4..387ee2a11e36 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -113,7 +113,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- data);
+ &data);
data = vcpu_data_host_to_guest(vcpu, data, len);
vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
}
@@ -189,14 +189,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
len);
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
mmio_write_buf(data_buf, len, data);
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
} else {
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
- fault_ipa, 0);
+ fault_ipa, NULL);
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index b6cebe8308ef..1db3c42ebd2a 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -660,6 +660,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
unsigned long switch_count;
struct task_struct *t;
+ /* If nothing to change, return right away, successfully. */
+ if (value == mips_get_process_fp_mode(task))
+ return 0;
+
+ /* Only accept a mode change if 64-bit FP enabled for o32. */
+ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+ return -EOPNOTSUPP;
+
+ /* And only for o32 tasks. */
+ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+ return -EOPNOTSUPP;
+
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index a3f38e6b7ea1..c3d2d2c05fdb 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target,
#endif /* CONFIG_64BIT */
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
+ * correspond 1:1 to buffer slots. Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ return user_regset_copyout(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots. Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS; i++) {
+ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+ err = user_regset_copyout(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
- for (i = 0; i < NUM_FPU_REGS; i++) {
- fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
+ * context's general register slots. Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ return user_regset_copyin(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots. Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ unsigned int i;
+ u64 fpr_val;
+ int err;
+
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+ err = user_regset_copyin(pos, count, kbuf, ubuf,
+ &fpr_val, i * sizeof(elf_fpreg_t),
+ (i + 1) * sizeof(elf_fpreg_t));
if (err)
return err;
+ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
}
return 0;
}
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
static int fpr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ u32 fcr31;
int err;
- u64 fpr_val;
- /* XXX fcr31 */
+ BUG_ON(count % sizeof(elf_fpreg_t));
+
+ if (pos + count > sizeof(elf_fpregset_t))
+ return -EIO;
init_fp_ctx(target);
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
- BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+ if (count > 0) {
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &fpr_val, i * sizeof(elf_fpreg_t),
- (i + 1) * sizeof(elf_fpreg_t));
+ &fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
if (err)
return err;
- set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+ ptrace_setfcr31(target, fcr31);
}
- return 0;
+ return err;
}
enum mips_regset {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c0a9e0d6bddd..9c723d40a1cf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -62,6 +62,7 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
@@ -382,6 +383,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ ---help---
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 78310f1842fb..943580da1fbe 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -191,6 +191,14 @@ endif
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 6bd2c6c95373..3f93dedb5a4d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -31,6 +31,7 @@
#include <linux/linkage.h>
#include <asm/inst.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2714,7 +2715,7 @@ ENTRY(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2759,7 +2760,7 @@ ENTRY(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index ce71f9212409..5881756f78a2 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -16,6 +16,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1210,7 +1211,7 @@ camellia_xts_crypt_16way:
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 0e0b8863a34b..0d45b04b490a 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1323,7 +1324,7 @@ camellia_xts_crypt_32way:
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 4fe27e074194..48767520cbe0 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -172,7 +173,7 @@ continue_block:
movzxw (bufp, %rax, 2), len
offset=crc_array-jump_table
lea offset(bufp, len, 1), bufp
- jmp *bufp
+ JMP_NOSPEC bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 46c9b152d953..7d1585642660 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -259,7 +260,7 @@ ENTRY(ret_from_kernel_thread)
pushl $0x0202 # Reset kernel eflags
popfl
movl PT_EBP(%esp), %eax
- call *PT_EBX(%esp)
+ CALL_NOSPEC PT_EBX(%esp)
movl $0, PT_EAX(%esp)
/*
@@ -894,7 +895,8 @@ trace:
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
@@ -929,7 +931,7 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
#ifdef CONFIG_TRACING
@@ -971,7 +973,7 @@ error_code:
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9bba7789333e..d5fbef331fd4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/kaiser.h>
+#include <asm/nospec-branch.h>
#include <asm/spec_ctrl.h>
#include <linux/err.h>
@@ -188,7 +189,13 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
+
movq %rax, RAX(%rsp)
1:
/*
@@ -283,7 +290,12 @@ tracesys_phase2:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx /* fixup for C */
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
movq %rax, RAX(%rsp)
1:
/* Use IRET because user could have changed pt_regs->foo */
@@ -502,7 +514,7 @@ ENTRY(ret_from_fork)
* nb: we depend on RESTORE_EXTRA_REGS above
*/
movq %rbp, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
movl $0, RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 09936e9c8154..215ea9214215 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
@@ -138,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
- ".popsection"
+ ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
@@ -149,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
- ".popsection"
+ ".popsection\n"
/*
* This must be included *after* the definition of ALTERNATIVE due to
@@ -271,4 +273,6 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..b15aa4083dfd
--- /dev/null
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -0,0 +1,41 @@
+#include <asm/ftrace.h>
+#include <asm/uaccess.h>
+#include <asm/string.h>
+#include <asm/page.h>
+#include <asm/checksum.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/special_insns.h>
+#include <asm/preempt.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f5063b6659eb..f861c31a70ea 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -113,4 +113,15 @@
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index b294c250287e..67b9be9699c1 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -203,6 +203,8 @@
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_SPEC_CTRL ( 7*32+19) /* Control Speculation Control */
+#define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
@@ -290,6 +292,9 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -394,6 +399,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
index 802bbbdfe143..48c791a411ab 100644
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -19,6 +19,16 @@
#define KAISER_SHADOW_PGD_OFFSET 0x1000
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * A page table address must have this alignment to stay the same when
+ * KAISER_SHADOW_PGD_OFFSET mask is applied
+ */
+#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1)
+#else
+#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE
+#endif
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_PAGE_TABLE_ISOLATION
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..1afd04eb5fb7
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+ SPECTRE_V2_RETPOLINE_MINIMAL,
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+ SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+ asm volatile (ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=&r" (loops), ASM_CALL_CONSTRAINT
+ : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index fdbbcd0b3383..c52ddc44bb45 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 6cd9c49ba651..a4a77286cb1d 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -16,7 +16,6 @@ void entry_SYSENTER_compat(void);
void x86_configure_nx(void);
void x86_report_nx(void);
-void stuff_rsb(void);
extern int reboot_force;
diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
index 96f61a82396d..2a754955cc79 100644
--- a/arch/x86/include/asm/spec_ctrl.h
+++ b/arch/x86/include/asm/spec_ctrl.h
@@ -93,10 +93,10 @@
#else /* __ASSEMBLY__ */
void x86_enable_ibrs(void);
void x86_disable_ibrs(void);
-void stuff_RSB(void);
unsigned int x86_ibrs_enabled(void);
unsigned int x86_ibpb_enabled(void);
void x86_spec_check(void);
+int nospec(char *str);
static inline void x86_ibp_barrier(void)
{
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b2173074a9ed..892c3bd95cbb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -168,17 +168,6 @@ static inline struct thread_info *current_thread_info(void)
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
}
-static inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-#ifdef CONFIG_X86_64
- asm("mov %%rsp,%0" : "=g" (sp));
-#else
- asm("mov %%esp,%0" : "=g" (sp));
-#endif
- return sp;
-}
-
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 85133b2b8e99..0977e7607046 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -215,9 +216,9 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c32c9757af6d..b765edbd6bbc 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -467,6 +467,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
else
mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f909362b7a..d6f375f1b928 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -339,9 +339,12 @@ done:
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c8205ff2a0f3..899a03389d45 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,14 +16,12 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-y += bugs.o
obj-y += spec_ctrl.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += bugs.o
-obj-$(CONFIG_X86_64) += bugs_64.o
-
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bd17db15a2c1..7e64fafb6787 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,6 +9,11 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/spec_ctrl.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -16,15 +21,24 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+
+static void __init spectre_v2_select_mitigation(void);
void __init check_bugs(void)
{
identify_boot_cpu();
-#ifndef CONFIG_SMP
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
+#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -40,4 +54,195 @@ void __init check_bugs(void)
alternative_instructions();
fpu__init_check_bugs();
+#else /* CONFIG_X86_64 */
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+#endif
+}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret;
+
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret > 0) {
+ if (match_option(arg, ret, "off")) {
+ goto disable;
+ } else if (match_option(arg, ret, "on")) {
+ spec2_print_if_secure("force enabled on command line.");
+ return SPECTRE_V2_CMD_FORCE;
+ } else if (match_option(arg, ret, "retpoline")) {
+ spec2_print_if_insecure("retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE;
+ } else if (match_option(arg, ret, "retpoline,amd")) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ spec2_print_if_insecure("AMD retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_AMD;
+ } else if (match_option(arg, ret, "retpoline,generic")) {
+ spec2_print_if_insecure("generic retpoline selected on command line.");
+ return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+ } else if (match_option(arg, ret, "auto")) {
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_AUTO;
+disable:
+ nospec("");
+ spec2_print_if_insecure("disabled on command line.");
+ return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ /* FALLTRHU */
+ case SPECTRE_V2_CMD_AUTO:
+ goto retpoline_auto;
+
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_KAISER))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
deleted file mode 100644
index 04f0fe5af83e..000000000000
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-#include <asm/cacheflush.h>
-
-void __init check_bugs(void)
-{
- identify_boot_cpu();
-#if !defined(CONFIG_SMP)
- printk(KERN_INFO "CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5bddf99e9784..b540e630dcbb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -435,8 +435,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -667,6 +667,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
}
}
+static void apply_forced_caps(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -824,6 +834,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
@@ -967,11 +984,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_identify)
this_cpu->c_identify(c);
- /* Clear/Set all flags overriden by options, after probe */
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ /* Clear/Set all flags overridden by options, after probe */
+ apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
@@ -1032,10 +1046,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
* Clear/Set all flags overriden by options, need do it
* before following smp all cpus cap AND.
*/
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index abf581ade8d2..b428a8174be1 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -994,9 +994,17 @@ static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == 79) {
- pr_err_once("late loading on model 79 is disabled.\n");
- return true;
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * may result in a system hang. This behavior is documented in item
+ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == 79 &&
+ c->x86_mask == 0x01 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
}
return false;
diff --git a/arch/x86/kernel/cpu/spec_ctrl.c b/arch/x86/kernel/cpu/spec_ctrl.c
index 096705e33348..321d6b204e6c 100644
--- a/arch/x86/kernel/cpu/spec_ctrl.c
+++ b/arch/x86/kernel/cpu/spec_ctrl.c
@@ -4,7 +4,6 @@
*/
#include <asm/msr.h>
-#include <asm/proto.h>
#include <asm/processor.h>
#include <asm/spec_ctrl.h>
@@ -47,16 +46,6 @@ void x86_enable_ibrs(void)
EXPORT_SYMBOL_GPL(x86_enable_ibrs);
/*
- * Do this indirection as otherwise we'd need to backport the
- * EXPORT_SYMBOL_GPL() for asm stuff.
- */
-void stuff_RSB(void)
-{
- stuff_rsb();
-}
-EXPORT_SYMBOL_GPL(stuff_RSB);
-
-/*
* Called after upgrading microcode, check CPUID directly.
*/
void x86_spec_check(void)
@@ -96,7 +85,7 @@ void x86_spec_check(void)
}
EXPORT_SYMBOL_GPL(x86_spec_check);
-static int __init nospec(char *str)
+int __init nospec(char *str)
{
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
ibrs_state = 0;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 38da8f29a9c8..528b7aa1780d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -55,17 +56,17 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
static inline void *current_stack(void)
{
- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -89,17 +90,17 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
/* Save the next esp at the bottom of the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
@@ -142,7 +143,7 @@ void do_softirq_own_stack(void)
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp);
}
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 5d9afbcb6074..09284cfab86f 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
#include <asm/ptrace.h>
#include <asm/ftrace.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -285,8 +285,9 @@ trace:
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
@@ -329,5 +330,5 @@ GLOBAL(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5621f882645e..b1a408e2c800 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -166,7 +166,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* from double_fault.
*/
BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer()) >= THREAD_SIZE);
+ current_stack_pointer) >= THREAD_SIZE);
preempt_enable_no_resched();
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a43536e3964c..0e00c4e70ed9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,7 @@
#include <asm/desc.h>
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -3907,7 +3908,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
- /* Clear host registers (marked as clobbered so it's safe) */
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
@@ -3958,7 +3962,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
}
- stuff_RSB();
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4ef273ecd355..3a4523c7c3e0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -47,8 +47,8 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/spec_ctrl.h>
-#include <asm/proto.h>
#include "trace.h"
#include "pmu.h"
@@ -834,8 +834,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ return -ENOENT;
+
+ /*
+ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
+ * generic mechanism.
+ */
+ asm("lfence");
+
+ if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
@@ -8656,6 +8664,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -8672,12 +8681,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -8714,7 +8734,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
- stuff_RSB();
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 53d3b96f523b..8113eae22a40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4118,7 +4118,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4366,7 +4366,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4388,14 +4388,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f2587888d987..12a34d15b648 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,6 +21,7 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index c1e623209853..90353a26ed95 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -28,7 +28,8 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/asm.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -155,7 +156,7 @@ ENTRY(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -437,7 +438,7 @@ ENTRY(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 000000000000..019a03599bb0
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
+#include <asm-generic/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index f58e00755eeb..b15c90b60ec7 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -194,6 +194,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
* requires that not to be #defined to 0): so mask it off here.
*/
flags &= ~_PAGE_GLOBAL;
+ if (!(__supported_pte_mask & _PAGE_NX))
+ flags &= ~_PAGE_NX;
for (; address < end_addr; address += PAGE_SIZE) {
target_address = get_pa_from_mapping(address);
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0b7a63d98440..805a3271a137 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -4,6 +4,7 @@
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
+#include <asm/kaiser.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -15,7 +16,8 @@ void __init reserve_real_mode(void)
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
/* Has to be under 1M so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ mem = memblock_find_in_range(0, 1 << 20, size,
+ KAISER_KERNEL_PGD_ALIGNMENT);
if (!mem)
panic("Cannot allocate trampoline\n");
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20d2f9d..781cca63f795 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#include <asm/kaiser.h>
#include "realmode.h"
.text
@@ -139,7 +140,7 @@ tr_gdt:
tr_gdt_end:
.bss
- .balign PAGE_SIZE
+ .balign KAISER_KERNEL_PGD_ALIGNMENT
GLOBAL(trampoline_pgd) .space PAGE_SIZE
.balign 8
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 43f5bdb6b570..eb58b73ca925 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -168,6 +168,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
spawn->alg = NULL;
spawns = &inst->alg.cra_users;
+
+ /*
+ * We may encounter an unregistered instance here, since
+ * an instance's spawns are set up prior to the instance
+ * being registered. An unregistered instance will have
+ * NULL ->cra_users.next, since ->cra_users isn't
+ * properly initialized until registration. But an
+ * unregistered instance cannot have any users, so treat
+ * it the same as ->cra_users being empty.
+ */
+ if (spawns->next == NULL)
+ break;
}
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
&secondary_spawns)));
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec99c7d..59992788966c 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -223,6 +223,9 @@ config GENERIC_CPU_DEVICES
config GENERIC_CPU_AUTOPROBE
bool
+config GENERIC_CPU_VULNERABILITIES
+ bool
+
config SOC_BUS
bool
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 91bbb1959d8d..3db71afbba93 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -498,10 +498,58 @@ static void __init cpu_dev_register_generic(void)
#endif
}
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
+ NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+ .name = "vulnerabilities",
+ .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+ if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_vulnerabilities_group))
+ pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
void __init cpu_dev_init(void)
{
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
panic("Failed to register CPU subsystem");
cpu_dev_register_generic();
+ cpu_register_vulnerabilities();
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1455b50ddcd8..752c02b6c56d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4085,7 +4085,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
q->limits.max_sectors = queue_max_hw_sectors(q);
- blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 04fd0f2b6af0..fda8e85dd5a2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2678,6 +2678,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
}
view_type = vmw_view_cmd_to_type(header->id);
+ if (view_type == vmw_view_max)
+ return -EINVAL;
cmd = container_of(header, typeof(*cmd), header);
ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
user_surface_converter,
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index bc6f2c7e6f05..9a8e820b13e3 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -31,6 +31,7 @@
#include <linux/clockchips.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
+#include <asm/nospec-branch.h>
#include "hyperv_vmbus.h"
/* The one and only */
@@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
return (u64)ULLONG_MAX;
__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
+ __asm__ __volatile__(CALL_NOSPEC :
+ "=a" (hv_status) :
"c" (control), "d" (input_address),
- "m" (hypercall_page));
+ THUNK_TARGET(hypercall_page));
return hv_status;
@@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hypercall_page)
return (u64)ULLONG_MAX;
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
+ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
"=a"(hv_status_lo) : "d" (control_hi),
"a" (control_lo), "b" (input_address_hi),
"c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_page));
+ "S"(output_address_lo),
+ THUNK_TARGET(hypercall_page));
return hv_status_lo | ((u64)hv_status_hi << 32);
#endif /* !x86_64 */
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 545ce9f3d715..052f6c853337 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -957,8 +957,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
return -ENOMEM;
attr->qp_state = IB_QPS_INIT;
- attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
+ attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
attr->port_num = ch->sport->port;
attr->pkey_index = 0;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index ede00e05fdfd..74ba2a851e72 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1544,13 +1544,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
return -ENOMEM;
arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- smmu_domain->pgtbl_ops = pgtbl_ops;
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
- if (IS_ERR_VALUE(ret))
+ if (IS_ERR_VALUE(ret)) {
free_io_pgtable_ops(pgtbl_ops);
+ return ret;
+ }
- return ret;
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ return 0;
}
static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index ad410e0c5b21..aa9e2e64a3f7 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1564,7 +1564,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
int l;
struct dm_buffer *b, *tmp;
unsigned long freed = 0;
- unsigned long count = nr_to_scan;
+ unsigned long count = c->n_buffers[LIST_CLEAN] +
+ c->n_buffers[LIST_DIRTY];
unsigned long retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
@@ -1601,6 +1602,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c;
unsigned long count;
+ unsigned long retain_target;
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
@@ -1609,8 +1611,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
return 0;
count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ retain_target = get_retain_buffers(c);
dm_bufio_unlock(c);
- return count;
+ return (count < retain_target) ? 0 : (count - retain_target);
}
/*
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 27e2352fcc42..b227f81e4a7e 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -430,7 +430,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
rc);
- return rc;
+ return (rc > 0) ? 0 : rc;
}
static void gs_usb_xmit_callback(struct urb *urb)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 479af106aaeb..424d1dee55c9 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3176,18 +3176,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
/* ioremap the TSU registers */
if (mdp->cd->tsu) {
struct resource *rtsu;
+
rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
- if (IS_ERR(mdp->tsu_addr)) {
- ret = PTR_ERR(mdp->tsu_addr);
+ if (!rtsu) {
+ dev_err(&pdev->dev, "no TSU resource\n");
+ ret = -ENODEV;
+ goto out_release;
+ }
+ /* We can only request the TSU region for the first port
+ * of the two sharing this TSU for the probe to succeed...
+ */
+ if (devno % 2 == 0 &&
+ !devm_request_mem_region(&pdev->dev, rtsu->start,
+ resource_size(rtsu),
+ dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "can't request TSU resource.\n");
+ ret = -EBUSY;
+ goto out_release;
+ }
+ mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+ resource_size(rtsu));
+ if (!mdp->tsu_addr) {
+ dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+ ret = -ENOMEM;
goto out_release;
}
mdp->port = devno % 2;
ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
}
- /* initialize first or needed device */
- if (!devno || pd->needs_init) {
+ /* Need to init only the first port of the two sharing a TSU */
+ if (devno % 2 == 0) {
if (mdp->cd->chip_reset)
mdp->cd->chip_reset(ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4b100ef4af9f..5adaf537513b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -272,8 +272,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
{
char *phy_bus_name = priv->plat->phy_bus_name;
unsigned long flags;
+ int interface = priv->plat->interface;
bool ret = false;
+ if ((interface != PHY_INTERFACE_MODE_MII) &&
+ (interface != PHY_INTERFACE_MODE_GMII) &&
+ !phy_interface_mode_is_rgmii(interface))
+ goto out;
+
/* Using PCS we cannot dial with the phy registers at this stage
* so we do not support extra feature like EEE.
*/
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 9c6357c03905..b64327722660 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -759,10 +759,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case ASHMEM_SET_SIZE:
ret = -EINVAL;
+ mutex_lock(&ashmem_mutex);
if (!asma->file) {
ret = 0;
asma->size = (size_t)arg;
}
+ mutex_unlock(&ashmem_mutex);
break;
case ASHMEM_GET_SIZE:
ret = asma->size;
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 1e82105342cc..47b9172182f7 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1929,7 +1929,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct iscsi_tmr_req *tmr_req;
struct iscsi_tm *hdr;
int out_of_order_cmdsn = 0, ret;
- bool sess_ref = false;
u8 function, tcm_function = TMR_UNKNOWN;
hdr = (struct iscsi_tm *) buf;
@@ -1971,18 +1970,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
buf);
}
+ transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
+ conn->sess->se_sess, 0, DMA_NONE,
+ TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
+
+ target_get_sess_cmd(&cmd->se_cmd, true);
+
/*
* TASK_REASSIGN for ERL=2 / connection stays inside of
* LIO-Target $FABRIC_MOD
*/
if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
- transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
- conn->sess->se_sess, 0, DMA_NONE,
- TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
-
- target_get_sess_cmd(&cmd->se_cmd, true);
- sess_ref = true;
-
switch (function) {
case ISCSI_TM_FUNC_ABORT_TASK:
tcm_function = TMR_ABORT_TASK;
@@ -2121,12 +2119,8 @@ attach:
* For connection recovery, this is also the default action for
* TMR TASK_REASSIGN.
*/
- if (sess_ref) {
- pr_debug("Handle TMR, using sess_ref=true check\n");
- target_put_sess_cmd(&cmd->se_cmd);
- }
-
iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ target_put_sess_cmd(&cmd->se_cmd);
return 0;
}
EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd);
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 8c1a50839038..77a25341cbdc 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
spin_unlock(&se_cmd->t_state_lock);
return false;
}
+ if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
+ if (se_cmd->scsi_status) {
+ pr_debug("Attempted to abort io tag: %llu early failure"
+ " status: 0x%02x\n", se_cmd->tag,
+ se_cmd->scsi_status);
+ spin_unlock(&se_cmd->t_state_lock);
+ return false;
+ }
+ }
if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
pr_debug("Attempted to abort io tag: %llu already shutdown,"
" skipping\n", se_cmd->tag);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 27e8b6d10529..d58e500c3cb6 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1980,6 +1980,7 @@ void target_execute_cmd(struct se_cmd *cmd)
}
cmd->t_state = TRANSPORT_PROCESSING;
+ cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
spin_unlock_irq(&cmd->t_state_lock);
@@ -2627,6 +2628,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
ret = -ESHUTDOWN;
goto out;
}
+ se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
out:
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index f70da2828f3c..3da9bd07f95c 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1069,7 +1069,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
return 1;
fail:
-
+ if (dev->eps[0].ring)
+ xhci_ring_free(xhci, dev->eps[0].ring);
if (dev->in_ctx)
xhci_free_container_ctx(xhci, dev->in_ctx);
if (dev->out_ctx)
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index b45cb77c0744..9e8789877763 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
if (gpio_is_valid(hub->gpio_reset)) {
err = devm_gpio_request_one(dev, hub->gpio_reset,
GPIOF_OUT_INIT_LOW, "usb3503 reset");
+ /* Datasheet defines a hardware reset to be at least 100us */
+ usleep_range(100, 10000);
if (err) {
dev_err(dev,
"unable to request GPIO %d as reset pin (%d)\n",
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 3598f1a62673..251d123d9046 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1001,7 +1001,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
break;
case MON_IOCQ_RING_SIZE:
+ mutex_lock(&rp->fetch_lock);
ret = rp->b_size;
+ mutex_unlock(&rp->fetch_lock);
break;
case MON_IOCT_RING_SIZE:
@@ -1228,12 +1230,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long offset, chunk_idx;
struct page *pageptr;
+ mutex_lock(&rp->fetch_lock);
offset = vmf->pgoff << PAGE_SHIFT;
- if (offset >= rp->b_size)
+ if (offset >= rp->b_size) {
+ mutex_unlock(&rp->fetch_lock);
return VM_FAULT_SIGBUS;
+ }
chunk_idx = offset / CHUNK_SIZE;
pageptr = rp->b_vec[chunk_idx].pg;
get_page(pageptr);
+ mutex_unlock(&rp->fetch_lock);
vmf->page = pageptr;
return 0;
}
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 1f5ecf905b7d..a4ab4fdf5ba3 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -120,6 +120,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -170,6 +171,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+ { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
{ USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 2f80163ffb94..8ed80f28416f 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -155,6 +155,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_ATA_1X),
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+ "Norelsys",
+ "NS1068X",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
/* Reported-by: Takeo Nakayama <javhera@gmx.com> */
UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
"JMicron",
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index e40da7759a0e..9752b93f754e 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -103,7 +103,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
udev->devnum, udev->devpath, usb_speed_string(udev->speed));
- pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+ pr_debug("tt hub ttport %d\n", udev->ttport);
dev_dbg(dev, " ");
for (i = 0; i < 16; i++)
@@ -136,12 +136,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
}
pr_debug("\n");
- dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
- dev_dbg(dev,
- "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
- &udev->descriptor, udev->config,
- udev->actconfig, udev->rawdescriptors);
+ dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+ udev->bus->bus_name);
dev_dbg(dev, "have_langid %d, string_langid %d\n",
udev->have_langid, udev->string_langid);
@@ -249,9 +245,6 @@ void usbip_dump_urb(struct urb *urb)
dev = &urb->dev->dev;
- dev_dbg(dev, " urb :%p\n", urb);
- dev_dbg(dev, " dev :%p\n", urb->dev);
-
usbip_dump_usb_device(urb->dev);
dev_dbg(dev, " pipe :%08x ", urb->pipe);
@@ -260,11 +253,9 @@ void usbip_dump_urb(struct urb *urb)
dev_dbg(dev, " status :%d\n", urb->status);
dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
- dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
dev_dbg(dev, " transfer_buffer_length:%d\n",
urb->transfer_buffer_length);
dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
- dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
usbip_dump_usb_ctrlrequest(
@@ -274,8 +265,6 @@ void usbip_dump_urb(struct urb *urb)
dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
dev_dbg(dev, " interval :%d\n", urb->interval);
dev_dbg(dev, " error_count :%d\n", urb->error_count);
- dev_dbg(dev, " context :%p\n", urb->context);
- dev_dbg(dev, " complete :%p\n", urb->complete);
}
EXPORT_SYMBOL_GPL(usbip_dump_urb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7c3a9dd79edd..2b0f908b5e84 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4352,7 +4352,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
- u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
+ u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned;
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
diff --git a/fs/dax.c b/fs/dax.c
index 78415c78a07e..6543089be8b5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -849,7 +849,7 @@ static int dax_insert_mapping(struct address_space *mapping,
}
/**
- * __dax_fault - handle a page fault on a DAX file
+ * __dax_fault2 - handle a page fault on a DAX file
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
* @get_block: The filesystem method used to translate file offsets to blocks
@@ -858,8 +858,8 @@ static int dax_insert_mapping(struct address_space *mapping,
* fault handler for DAX files. __dax_fault() assumes the caller has done all
* the necessary locking for the page fault to proceed successfully.
*/
-int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- get_block_t get_block)
+int __dax_fault2(struct vm_area_struct *vma, struct vm_fault *vmf,
+ int *map_errp, get_block_t get_block)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
@@ -895,8 +895,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
error = get_block(inode, block, &bh, 0);
if (!error && (bh.b_size < PAGE_SIZE))
error = -EIO; /* fs corruption? */
- if (error)
+ if (error) {
+ if (map_errp)
+ *map_errp = error;
goto unlock_entry;
+ }
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
@@ -922,8 +925,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
major = VM_FAULT_MAJOR;
if (!error && (bh.b_size < PAGE_SIZE))
error = -EIO;
- if (error)
+ if (error) {
+ if (map_errp)
+ *map_errp = error;
goto unlock_entry;
+ }
} else {
return dax_load_hole(mapping, entry, vmf);
}
@@ -942,6 +948,13 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
return VM_FAULT_SIGBUS | major;
return VM_FAULT_NOPAGE | major;
}
+EXPORT_SYMBOL(__dax_fault2);
+
+int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ get_block_t get_block)
+{
+ return __dax_fault2(vma, vmf, NULL, get_block);
+}
EXPORT_SYMBOL(__dax_fault);
/**
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f2aaaa2ac554..328365c3cef0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -195,7 +195,8 @@ out:
#ifdef CONFIG_FS_DAX
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- int result;
+ int result, error = 0;
+ int retries = 0;
handle_t *handle = NULL;
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
@@ -205,6 +206,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
+retry:
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
} else
@@ -213,11 +215,14 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = __dax_fault(vma, vmf, ext4_dax_get_block);
+ result = __dax_fault2(vma, vmf, &error, ext4_dax_get_block);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
+ ext4_should_retry_alloc(sb, &retries))
+ goto retry;
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
} else
diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h
new file mode 100644
index 000000000000..939869c772b1
--- /dev/null
+++ b/include/asm-generic/asm-prototypes.h
@@ -0,0 +1,13 @@
+#include <linux/bitops.h>
+#undef __memset
+extern void *__memset(void *, int, __kernel_size_t);
+#undef __memcpy
+extern void *__memcpy(void *, const void *, __kernel_size_t);
+#undef __memmove
+extern void *__memmove(void *, const void *, __kernel_size_t);
+#undef memset
+extern void *memset(void *, int, __kernel_size_t);
+#undef memcpy
+extern void *memcpy(void *, const void *, __kernel_size_t);
+#undef memmove
+extern void *memmove(void *, const void *, __kernel_size_t);
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
new file mode 100644
index 000000000000..43199a049da5
--- /dev/null
+++ b/include/asm-generic/export.h
@@ -0,0 +1,94 @@
+#ifndef __ASM_GENERIC_EXPORT_H
+#define __ASM_GENERIC_EXPORT_H
+
+#ifndef KSYM_FUNC
+#define KSYM_FUNC(x) x
+#endif
+#ifdef CONFIG_64BIT
+#define __put .quad
+#ifndef KSYM_ALIGN
+#define KSYM_ALIGN 8
+#endif
+#ifndef KCRC_ALIGN
+#define KCRC_ALIGN 8
+#endif
+#else
+#define __put .long
+#ifndef KSYM_ALIGN
+#define KSYM_ALIGN 4
+#endif
+#ifndef KCRC_ALIGN
+#define KCRC_ALIGN 4
+#endif
+#endif
+
+#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
+#define KSYM(name) _##name
+#else
+#define KSYM(name) name
+#endif
+
+/*
+ * note on .section use: @progbits vs %progbits nastiness doesn't matter,
+ * since we immediately emit into those sections anyway.
+ */
+.macro ___EXPORT_SYMBOL name,val,sec
+#ifdef CONFIG_MODULES
+ .globl KSYM(__ksymtab_\name)
+ .section ___ksymtab\sec+\name,"a"
+ .balign KSYM_ALIGN
+KSYM(__ksymtab_\name):
+ __put \val, KSYM(__kstrtab_\name)
+ .previous
+ .section __ksymtab_strings,"a"
+KSYM(__kstrtab_\name):
+#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
+ .asciz "_\name"
+#else
+ .asciz "\name"
+#endif
+ .previous
+#ifdef CONFIG_MODVERSIONS
+ .section ___kcrctab\sec+\name,"a"
+ .balign KCRC_ALIGN
+KSYM(__kcrctab_\name):
+ __put KSYM(__crc_\name)
+ .weak KSYM(__crc_\name)
+ .previous
+#endif
+#endif
+.endm
+#undef __put
+
+#if defined(__KSYM_DEPS__)
+
+#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym ===
+
+#elif defined(CONFIG_TRIM_UNUSED_KSYMS)
+
+#include <linux/kconfig.h>
+#include <generated/autoksyms.h>
+
+#define __EXPORT_SYMBOL(sym, val, sec) \
+ __cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym))
+#define __cond_export_sym(sym, val, sec, conf) \
+ ___cond_export_sym(sym, val, sec, conf)
+#define ___cond_export_sym(sym, val, sec, enabled) \
+ __cond_export_sym_##enabled(sym, val, sec)
+#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
+#define __cond_export_sym_0(sym, val, sec) /* nothing */
+
+#else
+#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
+#endif
+
+#define EXPORT_SYMBOL(name) \
+ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
+#define EXPORT_SYMBOL_GPL(name) \
+ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
+#define EXPORT_DATA_SYMBOL(name) \
+ __EXPORT_SYMBOL(name, KSYM(name),)
+#define EXPORT_DATA_SYMBOL_GPL(name) \
+ __EXPORT_SYMBOL(name, KSYM(name),_gpl)
+
+#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 3ea9aae2387d..7e04bcd9af8e 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -40,6 +40,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+extern ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
const struct attribute_group **groups,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 4ba71bdc0669..a88039c6e5ba 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -16,6 +16,7 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int __dax_fault2(struct vm_area_struct *, struct vm_fault *, int *, get_block_t);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, bool wake_all);
diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
index b33c7797eb57..a94b5bf57f51 100644
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -17,10 +17,11 @@
* the last step cherry picks the 2nd arg, we get a zero.
*/
#define __ARG_PLACEHOLDER_1 0,
-#define config_enabled(cfg) _config_enabled(cfg)
-#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value)
-#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0)
-#define ___config_enabled(__ignored, val, ...) val
+#define config_enabled(cfg) ___is_defined(cfg)
+#define __is_defined(x) ___is_defined(x)
+#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
+#define __take_second_arg(__ignored, val, ...) val
/*
* IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
@@ -42,7 +43,7 @@
* built-in code when CONFIG_FOO is set to 'm'.
*/
#define IS_REACHABLE(option) (config_enabled(option) || \
- (config_enabled(option##_MODULE) && config_enabled(MODULE)))
+ (config_enabled(option##_MODULE) && __is_defined(MODULE)))
/*
* IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b64825d6ad26..d7ebdb35d356 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -687,6 +687,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
}
/**
+ * phy_interface_mode_is_rgmii - Convenience function for testing if a
+ * PHY interface mode is RGMII (all variants)
+ * @mode: the phy_interface_t enum
+ */
+static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
+{
+ return mode >= PHY_INTERFACE_MODE_RGMII &&
+ mode <= PHY_INTERFACE_MODE_RGMII_TXID;
+};
+
+/**
* phy_interface_is_rgmii - Convenience function for testing if a PHY interface
* is RGMII (all variants)
* @phydev: the phy_device struct
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index 8c9131db2b25..b050ef51e27e 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -16,7 +16,6 @@ struct sh_eth_plat_data {
unsigned char mac_addr[ETH_ALEN];
unsigned no_ether_link:1;
unsigned ether_link_active_low:1;
- unsigned needs_init:1;
};
#endif
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 6969cdd96b74..eca3b0419541 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -508,6 +508,7 @@ struct se_cmd {
#define CMD_T_BUSY (1 << 9)
#define CMD_T_TAS (1 << 10)
#define CMD_T_FABRIC_STOP (1 << 11)
+#define CMD_T_PRE_EXECUTE (1 << 12)
spinlock_t t_state_lock;
struct kref cmd_kref;
struct completion t_transport_stop_comp;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index d6f83222a6a1..67ff6555967f 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -204,7 +204,7 @@ TRACE_EVENT(kvm_ack_irq,
{ KVM_TRACE_MMIO_WRITE, "write" }
TRACE_EVENT(kvm_mmio,
- TP_PROTO(int type, int len, u64 gpa, u64 val),
+ TP_PROTO(int type, int len, u64 gpa, void *val),
TP_ARGS(type, len, gpa, val),
TP_STRUCT__entry(
@@ -218,7 +218,10 @@ TRACE_EVENT(kvm_mmio,
__entry->type = type;
__entry->len = len;
__entry->gpa = gpa;
- __entry->val = val;
+ __entry->val = 0;
+ if (val)
+ memcpy(&__entry->val, val,
+ min_t(u32, sizeof(__entry->val), len));
),
TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 01abb6431fd9..e2713b0794ae 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d43f67..a47f693f9f14 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -289,7 +289,7 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4b3b1f794600..c08b960c7f5c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1783,10 +1783,13 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.flags = 0;
cork.base.addr = 0;
cork.base.opt = NULL;
+ cork.base.dst = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
+ }
if (dontfrag < 0)
dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 97cb02dc5f02..a7170a23ab0b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1083,10 +1083,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 4d2aaebd4f97..e546a987a9d3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = {
};
#endif
-static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
+static const char *hw_flag_names[] = {
#define FLAG(F) [IEEE80211_HW_##F] = #F
FLAG(HAS_RATE_CONTROL),
FLAG(RX_INCLUDES_FCS),
@@ -125,9 +125,6 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(TDLS_WIDER_BW),
FLAG(SUPPORTS_AMSDU_IN_AMPDU),
FLAG(BEACON_TX_STATUS),
-
- /* keep last for the build bug below */
- (void *)0x1
#undef FLAG
};
@@ -147,7 +144,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
/* fail compilation if somebody adds or removes
* a flag without updating the name array above
*/
- BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1);
+ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS);
for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) {
if (test_bit(i, local->hw.flags))
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index b5f08e8cab33..e4bb1de1d526 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,9 +1,5 @@
# Makefile for vm selftests
-ifndef OUTPUT
- OUTPUT := $(shell pwd)
-endif
-
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
BINARIES = compaction_test
BINARIES += hugepage-mmap
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index eabcff411984..92d7eff2827a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,7 +4,8 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall \
+ test_vsyscall
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000000..6e0bd52ad53d
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[128];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ char r, x;
+ void *start, *end;
+ char name[128];
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking for native vsyscall\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_native_vsyscall();
+#endif
+
+ return nerrs ? 1 : 0;
+}