Home Home > GIT Browse > openSUSE-15.1
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2019-10-19 07:29:34 +0200
committerKernel Build Daemon <kbuild@suse.de>2019-10-19 07:29:34 +0200
commit5ae5de59dff9461d50bd8a4b69b8815411a1e5e3 (patch)
tree32b58e9bb3a65f6a0f66fadb1bffa65a22e6f5ba
parente243c502db515984e26d5fe730f2d8daf753aaac (diff)
parent8eba0303ac86490d3d027c83ad0ee705d02573e9 (diff)
Merge branch 'SLE15-SP1' into openSUSE-15.1openSUSE-15.1
-rw-r--r--Documentation/virtual/kvm/locking.txt4
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h3
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h100
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h11
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/idle_book3s.S2
-rw-r--r--arch/powerpc/kernel/mce_power.c3
-rw-r--r--arch/powerpc/kernel/rtas.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/mm/hash_native_64.c4
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c51
-rw-r--r--arch/powerpc/mm/pgtable-radix.c14
-rw-r--r--arch/powerpc/mm/pgtable_64.c56
-rw-r--r--arch/powerpc/mm/tlb-radix.c158
-rw-r--r--arch/powerpc/platforms/powernv/smp.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c9
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/x86/entry/entry_64.S1
-rw-r--r--arch/x86/include/asm/pgtable_64.h20
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/kvm/mmu.c110
-rw-r--r--arch/x86/kvm/mmutrace.h59
-rw-r--r--arch/x86/kvm/paging_tmpl.h52
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--arch/x86/mm/pgtable.c8
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/xen-asm_64.S1
-rw-r--r--block/blk-sysfs.c57
-rw-r--r--block/blk-wbt.c118
-rw-r--r--drivers/char/hw_random/core.c2
-rw-r--r--drivers/char/mem.c21
-rw-r--r--drivers/crypto/talitos.c1
-rw-r--r--drivers/firmware/dmi_scan.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c6
-rw-r--r--drivers/infiniband/core/cache.c1
-rw-r--r--drivers/infiniband/core/multicast.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h14
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/media/platform/atmel/atmel-isc.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c17
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c19
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/smt.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c49
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c6
-rw-r--r--drivers/net/usb/cdc_ether.c11
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c2
-rw-r--r--drivers/pci/host/pci-hyperv.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c2
-rw-r--r--drivers/scsi/storvsc_drv.c3
-rw-r--r--drivers/usb/image/microtek.c4
-rw-r--r--drivers/usb/misc/adutux.c21
-rw-r--r--drivers/usb/misc/legousbtower.c55
-rw-r--r--drivers/usb/misc/usblcd.c33
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/inode.c10
-rw-r--r--fs/ceph/inode.c48
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--include/linux/kvm_host.h8
-rw-r--r--include/linux/net.h1
-rw-r--r--include/net/psample.h3
-rw-r--r--include/net/sock.h1
-rw-r--r--include/rdma/ib_verbs.h4
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/core/sock.c22
-rw-r--r--net/core/sock_diag.c12
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/nfc/llcp_sock.c7
-rw-r--r--net/psample/psample.c2
-rw-r--r--net/sched/act_sample.c6
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/socket.c5
-rw-r--r--net/tipc/name_distr.c3
-rw-r--r--net/wireless/wext-sme.c8
-rw-r--r--tools/objtool/Makefile2
-rw-r--r--virt/kvm/kvm_main.c40
102 files changed, 983 insertions, 552 deletions
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 1bb8bcaf8497..635cd6eaf714 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
-
Everything else is a leaf: no other lock is taken inside the critical
sections.
@@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
------------
Name: kvm_lock
-Type: spinlock_t
+Type: mutex
Arch: any
Protects: - vm_list
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index f6262d28dad9..fcfc82e25675 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -49,6 +49,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
-extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
+extern void radix__flush_all_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid_guest(unsigned int lpid);
#endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2bba1618bd5f..a9178bb83d52 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void)
return xirr;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * 105: smp_muxed_ipi_set_message():
+ * 105: smb_mb()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * --105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 42: // returns to executing guest
+ * | // RE-ORDERED STORE COMPLETES
+ * ->105: message[CALL_FUNCTION] = 1
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * -- 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 105: smp_muxed_ipi_set_message():
+ * | 105: smb_mb()
+ * | 105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | // RE-ORDERED STORE COMPLETES
+ * -> 42: kvmppc_clear_host_ipi(42)
+ * 42: // returns to executing guest
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
{
- paca[cpu].kvm_hstate.host_ipi = host_ipi;
+ /*
+ * order stores of IPI messages vs. setting of host_ipi flag
+ *
+ * pairs with the barrier in kvmppc_clear_host_ipi()
+ */
+ smp_mb();
+ paca[cpu].kvm_hstate.host_ipi = 1;
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+ paca[cpu].kvm_hstate.host_ipi = 0;
+ /*
+ * order clearing of host_ipi flag vs. processing of IPI messages
+ *
+ * pairs with the barrier in kvmppc_set_host_ipi()
+ */
+ smp_mb();
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -483,7 +574,10 @@ static inline u32 kvmppc_get_xics_latch(void)
return 0;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 4251b217f246..a4ac7183372e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -552,6 +552,15 @@
#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \
((IH & 0x7) << 21))
-#define PPC_INVALIDATE_ERAT PPC_SLBIA(7)
+
+/*
+ * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix
+ * implies CPU_FTR_ARCH_300). USER/GUEST invalidates may only be used by radix
+ * mode (on HPT these would also invalidate various SLBEs which may not be
+ * desired).
+ */
+#define PPC_ISA_3_0_INVALIDATE_ERAT PPC_SLBIA(7)
+#define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3)
+#define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6)
#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index b6fe883b1016..5828144555af 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -36,7 +36,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -51,7 +51,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -86,7 +86,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index c6091ff46ccf..aed078cf66c0 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -544,7 +544,7 @@ pnv_restore_hyp_resource_arch300:
*/
blt cr3,1f
BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
+ PPC_ISA_3_0_INVALIDATE_ERAT
ld r1,PACAR1(r13)
ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index a5e4b34c4fa6..ffa2f40a33e7 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -65,8 +65,7 @@ static void flush_erat(void)
return;
}
#endif
- /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
#define MCE_FLUSH_SLB 1
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e5635fc50e9c..4bd5df2e63b6 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -20,6 +20,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -901,6 +902,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 758d1d23215e..aaafb9f080d5 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -61,7 +61,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
- kvmppc_set_host_ipi(hcpu, 1);
+ kvmppc_set_host_ipi(hcpu);
smp_mb();
kvmhv_rm_send_ipi(hcpu);
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index c433851cfd32..530a65d1a442 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -60,7 +60,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
* tlbiel instruction for hash, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -116,7 +116,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
void hash__tlbiel_all(unsigned int action)
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 2b15e238cb6d..d4acc86ac290 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -9,9 +9,12 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/memblock.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
+#include <asm/trace.h>
+#include <asm/powernv.h>
#include "mmu_decl.h"
#include <trace/events/thp.h>
@@ -149,3 +152,51 @@ int remove_section_mapping(unsigned long start, unsigned long end)
return hash__remove_section_mapping(start, end);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ unsigned long ptcr;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
+ mtspr(SPRN_PTCR, ptcr);
+ powernv_set_nmmu_ptcr(ptcr);
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+ unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /*
+ * Global flush of TLBs and partition table caches for this lpid.
+ * The type of flush (hash or radix) depends on what the previous
+ * use of this partition ID was, not the new use.
+ */
+ if (old & PATB_HR) {
+ radix__flush_all_lpid(lpid);
+ radix__flush_all_lpid_guest(lpid);
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ /* do we need fixup here ?*/
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ecd6d082c9e8..2cade743731e 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -34,9 +34,13 @@ unsigned int mmu_base_pid;
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size)
{
- unsigned long patb1 = base | table_size | PATB_GR;
+ unsigned long patb0, patb1;
+
+ patb0 = be64_to_cpu(partition_tb[0].patb0);
+ patb1 = base | table_size | PATB_GR;
+
+ mmu_partition_table_set_entry(0, patb0, patb1);
- partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -532,8 +536,7 @@ void __init radix__early_init_mmu(void)
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -553,8 +556,7 @@ void radix__early_init_mmu_secondary(void)
radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 8a2fca97e122..677a6c616963 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -33,7 +33,6 @@
#include <linux/swap.h>
#include <linux/stddef.h>
#include <linux/vmalloc.h>
-#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
@@ -47,13 +46,11 @@
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
-#include <asm/trace.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/firmware.h>
#include <asm/dma.h>
-#include <asm/powernv.h>
#include "mmu_decl.h"
@@ -437,59 +434,6 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-void __init mmu_partition_table_init(void)
-{
- unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
- unsigned long ptcr;
-
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
- /* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
-
- /*
- * update partition table control register,
- * 64 K size.
- */
- ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
- mtspr(SPRN_PTCR, ptcr);
- powernv_set_nmmu_ptcr(ptcr);
-}
-
-void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1)
-{
- unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
-
- partition_tb[lpid].patb0 = cpu_to_be64(dw0);
- partition_tb[lpid].patb1 = cpu_to_be64(dw1);
-
- /*
- * Global flush of TLBs and partition table caches for this lpid.
- * The type of flush (hash or radix) depends on what the previous
- * use of this partition ID was, not the new use.
- */
- asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR) {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
- } else {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
- }
- /* do we need fixup here ?*/
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void)
{
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 9004bc5bee5a..774bac5cefd5 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -27,7 +27,7 @@
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -53,11 +53,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
* and partition table entries. Then flush the remaining sets of the
* TLB.
*/
- tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
- for (set = 1; set < num_sets; set++)
- tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
- /* Do the same for process scoped entries. */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* MSR[HV] should flush partition scope translations first. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+ }
+
+ /* Flush process scoped entries. */
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
for (set = 1; set < num_sets; set++)
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
@@ -85,10 +89,10 @@ void radix__tlbiel_all(unsigned int action)
else
WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
-static inline void __tlbiel_pid(unsigned long pid, int set,
+static __always_inline void __tlbiel_pid(unsigned long pid, int set,
unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -104,7 +108,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -118,23 +122,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbiel_lpid(unsigned long lpid, int set,
- unsigned long ric)
-{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
- prs = 0; /* partition scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
-}
-
-static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
+static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -148,25 +136,22 @@ static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
- unsigned long ric)
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
+ rs = lpid;
prs = 1; /* process scoped */
r = 1; /* radix format */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -181,8 +166,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void __tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -197,8 +182,8 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -298,7 +283,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
@@ -321,7 +306,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
}
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
@@ -349,34 +334,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
-{
- int set;
-
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
-
- asm volatile("ptesync": : :"memory");
-
- /*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
- */
- __tlbiel_lpid(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
- }
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
{
asm volatile("ptesync": : :"memory");
@@ -402,34 +359,28 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
- int set;
-
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
-
- asm volatile("ptesync": : :"memory");
-
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
*/
- __tlbiel_lpid_guest(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
+ fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
@@ -441,8 +392,8 @@ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -475,8 +426,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
-static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -486,7 +437,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -809,23 +760,18 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
-void radix__local_flush_tlb_lpid(unsigned int lpid)
+void radix__flush_all_lpid(unsigned int lpid)
{
- _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
+ _tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
/*
- * Flush process scoped translations from LPID (=LPIDR).
- * Important difference, the guest normally manages its own translations,
- * but some cases e.g., vCPU CPU migration require KVM to flush.
+ * Flush process scoped translations from LPID (=LPIDR)
*/
-void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+void radix__flush_all_lpid_guest(unsigned int lpid)
{
- _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
+ _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
-
static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
@@ -894,7 +840,7 @@ void radix__tlb_flush(struct mmu_gather *tlb)
tlb->need_flush_all = 0;
}
-static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
unsigned long start, unsigned long end,
int psize, bool also_pwc)
{
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 509ab17e0897..cab8fdf01b7c 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -193,7 +193,7 @@ static void pnv_smp_cpu_kill_self(void)
* for coming online, which are handled via
* generic_check_cpu_restart() calls.
*/
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
srr1 = pnv_cpu_offline(cpu);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 59cd7757a141..1c5ac9aa0573 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -861,6 +861,9 @@ void __init pseries_lpar_read_hblkrm_characteristics(void)
unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
int call_status, len, idx, bpsize;
+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ return;
+
spin_lock(&rtas_data_buf_lock);
memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 3b5eba88776f..96f4afd9ae98 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -12,6 +12,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/stat.h>
#include <linux/completion.h>
@@ -209,7 +210,11 @@ static int update_dt_node(__be32 phandle, s32 scope)
prop_data += vd;
}
+
+ cond_resched();
}
+
+ cond_resched();
} while (rtas_rc == 1);
of_node_put(dn);
@@ -312,8 +317,12 @@ int pseries_devicetree_update(s32 scope)
add_dt_node(phandle, drc_index);
break;
}
+
+ cond_resched();
}
}
+
+ cond_resched();
} while (rc == 1);
kfree(rtas_buf);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 2bfb9968d562..1821c446c6a3 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -145,7 +145,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu)
{
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
@@ -184,7 +184,7 @@ void icp_native_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
@@ -205,7 +205,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index c71d2ea42627..e3e52cf035a9 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -130,7 +130,7 @@ static void icp_opal_cause_ipi(int cpu)
{
int hw_cpu = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
}
@@ -138,7 +138,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
return smp_ipi_demux();
@@ -161,7 +161,7 @@ void icp_opal_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
- kvmppc_set_host_ipi(cpu, 0);
+ kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, "
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e563f71c2e32..f7801798dd92 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2282,13 +2282,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d5284a6e2104..541b8738235b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1158,7 +1158,6 @@ idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
-idtentry xenint3 do_int3 has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 53423346b11d..7b56fb1371e2 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -54,15 +54,15 @@ struct mm_struct;
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = native_make_pte(0);
+ WRITE_ONCE(*ptep, pte);
}
-static inline void native_set_pte(pte_t *ptep, pte_t pte)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- *ptep = pte;
+ native_set_pte(ptep, native_make_pte(0));
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -72,7 +72,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}
static inline void native_pmd_clear(pmd_t *pmd)
@@ -108,7 +108,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}
static inline void native_pud_clear(pud_t *pud)
@@ -220,7 +220,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
#else
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
#endif
}
@@ -236,9 +236,9 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- *pgdp = pti_set_user_pgd(pgdp, pgd);
+ WRITE_ONCE(*pgdp, pti_set_user_pgd(pgdp, pgd));
#else
- *pgdp = pgd;
+ WRITE_ONCE(*pgdp, pgd);
#endif
}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 6f342cb9fecd..62f0bf741955 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -57,7 +57,7 @@ asmlinkage void trace_page_fault(void);
asmlinkage void xen_divide_error(void);
asmlinkage void xen_xennmi(void);
asmlinkage void xen_xendebug(void);
-asmlinkage void xen_xenint3(void);
+asmlinkage void xen_int3(void);
asmlinkage void xen_overflow(void);
asmlinkage void xen_bounds(void);
asmlinkage void xen_invalid_op(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 42cca05f05dc..cb8a1045393b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -140,9 +140,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
-
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@@ -244,8 +241,13 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
static void mmu_spte_set(u64 *sptep, u64 spte);
+static bool is_executable_pte(u64 spte);
static void mmu_free_roots(struct kvm_vcpu *vcpu);
+#define CREATE_TRACE_POINTS
+#include "mmutrace.h"
+
+
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
BUG_ON((mmio_mask & mmio_value) != mmio_value);
@@ -1009,10 +1011,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
{
- if (sp->role.direct)
- BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
- else
+ if (!sp->role.direct) {
sp->gfns[index] = gfn;
+ return;
+ }
+
+ if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
+ pr_err_ratelimited("gfn mismatch under direct page %llx "
+ "(expected %llx, got %llx)\n",
+ sp->gfn,
+ kvm_mmu_page_get_gfn(sp, index), gfn);
}
/*
@@ -2915,10 +2923,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
ret = RET_PF_EMULATE;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
- pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
- is_large_pte(*sptep)? "2MB" : "4kB",
- *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
- *sptep, sptep);
+ trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
++vcpu->kvm->stat.lpages;
@@ -2930,8 +2935,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
- kvm_release_pfn_clean(pfn);
-
return ret;
}
@@ -2966,9 +2969,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (ret <= 0)
return -1;
- for (i = 0; i < ret; i++, gfn++, start++)
+ for (i = 0; i < ret; i++, gfn++, start++) {
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
+ put_page(pages[i]);
+ }
return 0;
}
@@ -3016,40 +3021,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+ int map_writable, int level, kvm_pfn_t pfn,
+ bool prefault)
{
- struct kvm_shadow_walk_iterator iterator;
+ struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
- int emulate = 0;
- gfn_t pseudo_gfn;
+ int ret;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ gfn_t base_gfn = gfn;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
- return 0;
+ return RET_PF_RETRY;
- for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
- if (iterator.level == level) {
- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
- write, level, gfn, pfn, prefault,
- map_writable);
- direct_pte_prefetch(vcpu, iterator.sptep);
- ++vcpu->stat.pf_fixed;
+ trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ for_each_shadow_entry(vcpu, gpa, it) {
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == level)
break;
- }
-
- drop_large_spte(vcpu, iterator.sptep);
- if (!is_shadow_present_pte(*iterator.sptep)) {
- u64 base_addr = iterator.addr;
- base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
- pseudo_gfn = base_addr >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
- iterator.level - 1, 1, ACC_ALL);
+ drop_large_spte(vcpu, it.sptep);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+ it.level - 1, true, ACC_ALL);
- link_shadow_page(vcpu, iterator.sptep, sp);
+ link_shadow_page(vcpu, it.sptep, sp);
}
}
- return emulate;
+
+ ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
+ write, level, base_gfn, pfn, prefault,
+ map_writable);
+ direct_pte_prefetch(vcpu, it.sptep);
+ ++vcpu->stat.pf_fixed;
+ return ret;
}
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
@@ -3084,11 +3089,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
- gfn_t *gfnp, kvm_pfn_t *pfnp,
+ gfn_t gfn, kvm_pfn_t *pfnp,
int *levelp)
{
kvm_pfn_t pfn = *pfnp;
- gfn_t gfn = *gfnp;
int level = *levelp;
/*
@@ -3115,8 +3119,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
mask = KVM_PAGES_PER_HPAGE(level) - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
- gfn &= ~mask;
- *gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
@@ -3373,22 +3375,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
@@ -3965,22 +3964,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
-
+ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -5485,7 +5481,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -5535,7 +5531,7 @@ unlock:
break;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return freed;
}
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 8b97a6cba8d1..9c298454c278 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -324,6 +324,65 @@ TRACE_EVENT(
__entry->kvm_gen == __entry->spte_gen
)
);
+
+TRACE_EVENT(
+ kvm_mmu_set_spte,
+ TP_PROTO(int level, gfn_t gfn, u64 *sptep),
+ TP_ARGS(level, gfn, sptep),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, spte)
+ __field(u64, sptep)
+ __field(u8, level)
+ /* These depend on page entry type, so compute them now. */
+ __field(bool, r)
+ __field(bool, x)
+ __field(u8, u)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = gfn;
+ __entry->spte = *sptep;
+ __entry->sptep = virt_to_phys(sptep);
+ __entry->level = level;
+ __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
+ __entry->x = is_executable_pte(__entry->spte);
+ __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
+ ),
+
+ TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
+ __entry->gfn, __entry->spte,
+ __entry->r ? "r" : "-",
+ __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
+ __entry->x ? "x" : "-",
+ __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
+ __entry->level, __entry->sptep
+ )
+);
+
+TRACE_EVENT(
+ kvm_mmu_spte_requested,
+ TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
+ TP_ARGS(addr, level, pfn),
+
+ TP_STRUCT__entry(
+ __field(u64, gfn)
+ __field(u64, pfn)
+ __field(u8, level)
+ ),
+
+ TP_fast_assign(
+ __entry->gfn = addr >> PAGE_SHIFT;
+ __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+ __entry->level = level;
+ ),
+
+ TP_printk("gfn %llx pfn %llx level %d",
+ __entry->gfn, __entry->pfn, __entry->level
+ )
+);
+
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 5abae72266b7..6a6625dd62de 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,7 +181,7 @@ no_present:
* set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
* to signify readability since it isn't used in the EPT case
*/
-static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
+static inline unsigned FNAME(gpte_access)(u64 gpte)
{
unsigned access;
#if PTTYPE == PTTYPE_EPT
@@ -394,8 +394,8 @@ retry_walk:
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
- walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
- walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
+ walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
+ walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
goto error;
@@ -501,7 +501,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
gfn = gpte_to_gfn(gpte);
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
@@ -515,6 +515,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
+ kvm_release_pfn_clean(pfn);
return true;
}
@@ -594,6 +595,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
+ gfn_t base_gfn;
direct_access = gw->pte_access;
@@ -638,35 +640,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- for (;
- shadow_walk_okay(&it) && it.level > hlevel;
- shadow_walk_next(&it)) {
- gfn_t direct_gfn;
+ base_gfn = gw->gfn;
+
+ trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+ for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
+ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ if (it.level == hlevel)
+ break;
+
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
- if (is_shadow_present_pte(*it.sptep))
- continue;
-
- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
-
- sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
- true, direct_access);
- link_shadow_page(vcpu, it.sptep, sp);
+ if (!is_shadow_present_pte(*it.sptep)) {
+ sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+ it.level - 1, true, direct_access);
+ link_shadow_page(vcpu, it.sptep, sp);
+ }
}
- clear_sp_write_flooding_count(it.sptep);
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
- it.level, gw->gfn, pfn, prefault, map_writable);
+ it.level, base_gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
-
+ ++vcpu->stat.pf_fixed;
return ret;
out_gpte_changed:
- kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
}
@@ -814,6 +815,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
+ r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -822,19 +824,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (!force_pt_level)
- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
+ transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
- ++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
- spin_unlock(&vcpu->kvm->mmu_lock);
-
- return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return RET_PF_RETRY;
+ return r;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
@@ -995,7 +993,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
- pte_access &= FNAME(gpte_access)(vcpu, gpte);
+ pte_access &= FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ad889ff07d4..20fe7485472f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -90,8 +90,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
@@ -194,7 +194,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
- { "largepages", VM_STAT(lpages) },
+ { "largepages", VM_STAT(lpages, .mode = 0444) },
{ "max_mmu_page_hash_collisions",
VM_STAT(max_mmu_page_hash_collisions) },
{ NULL }
@@ -6175,7 +6175,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -6185,7 +6185,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -6322,12 +6322,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 814237ab518d..85c10b6e5d23 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -259,7 +259,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
- pgdp[i] = native_make_pgd(0);
+ pgd_clear(&pgdp[i]);
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
@@ -429,7 +429,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(*ptep, entry);
if (changed && dirty) {
- *ptep = entry;
+ set_pte(ptep, entry);
pte_update(vma->vm_mm, address, ptep);
}
@@ -446,7 +446,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed && dirty) {
- *pmdp = entry;
+ set_pmd(pmdp, entry);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -466,7 +466,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
VM_BUG_ON(address & ~HPAGE_PUD_MASK);
if (changed && dirty) {
- *pudp = entry;
+ set_pud(pudp, entry);
/*
* We had a write-protection fault here and changed the pud
* to to more permissive. No need to flush the TLB for that,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index a420631ff5d3..b91621b312b9 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -597,12 +597,12 @@ struct trap_array_entry {
static struct trap_array_entry trap_array[] = {
{ debug, xen_xendebug, true },
- { int3, xen_xenint3, true },
{ double_fault, xen_double_fault, true },
#ifdef CONFIG_X86_MCE
{ machine_check, xen_machine_check, true },
#endif
{ nmi, xen_xennmi, true },
+ { int3, xen_int3, false },
{ overflow, xen_overflow, false },
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index e1a90f98790c..9df66126e5e2 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -34,7 +34,6 @@ xen_pv_trap divide_error
xen_pv_trap debug
xen_pv_trap xendebug
xen_pv_trap int3
-xen_pv_trap xenint3
xen_pv_trap xennmi
xen_pv_trap overflow
xen_pv_trap bounds
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7f4283291a61..4c3c3a3454e4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -424,6 +424,26 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
+}
+
+static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ unsigned int val;
+ int err;
+
+ err = kstrtou32(page, 10, &val);
+ if (err || val == 0)
+ return -EINVAL;
+
+ blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+
+ return count;
+}
+
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
if (!q->rq_wb)
@@ -683,6 +703,12 @@ static struct queue_sysfs_entry queue_dax_entry = {
.show = queue_dax_show,
};
+static struct queue_sysfs_entry queue_io_timeout_entry = {
+ .attr = {.name = "io_timeout", .mode = 0644 },
+ .show = queue_io_timeout_show,
+ .store = queue_io_timeout_store,
+};
+
static struct queue_sysfs_entry queue_wb_lat_entry = {
.attr = {.name = "wbt_lat_usec", .mode = 0644 },
.show = queue_wb_lat_show,
@@ -697,7 +723,7 @@ static struct queue_sysfs_entry throtl_sample_time_entry = {
};
#endif
-static struct attribute *default_attrs[] = {
+static struct attribute *queue_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
@@ -732,12 +758,32 @@ static struct attribute *default_attrs[] = {
&queue_dax_entry.attr,
&queue_wb_lat_entry.attr,
&queue_poll_delay_entry.attr,
+ &queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
&throtl_sample_time_entry.attr,
#endif
NULL,
};
+static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
+ int n)
+{
+ struct request_queue *q =
+ container_of(kobj, struct request_queue, kobj);
+
+ if (attr == &queue_io_timeout_entry.attr &&
+ (!q->mq_ops || !q->mq_ops->timeout))
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group queue_attr_group = {
+ .attrs = queue_attrs,
+ .is_visible = queue_attr_visible,
+};
+
+
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
static ssize_t
@@ -871,7 +917,6 @@ static const struct sysfs_ops queue_sysfs_ops = {
struct kobj_type blk_queue_ktype = {
.sysfs_ops = &queue_sysfs_ops,
- .default_attrs = default_attrs,
.release = blk_release_queue,
};
@@ -921,6 +966,14 @@ int blk_register_queue(struct gendisk *disk)
goto unlock;
}
+ ret = sysfs_create_group(&q->kobj, &queue_attr_group);
+ if (ret) {
+ blk_trace_remove_sysfs(dev);
+ kobject_del(&q->kobj);
+ kobject_put(&dev->kobj);
+ goto unlock;
+ }
+
if (q->mq_ops) {
__blk_mq_register_dev(dev, q);
blk_mq_debugfs_register(q);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index b7a0e9856134..38cafb2c4a26 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -139,20 +139,16 @@ static void rwb_wake_all(struct rq_wb *rwb)
for (i = 0; i < WBT_NUM_RWQ; i++) {
struct rq_wait *rqw = &rwb->rq_wait[i];
- if (waitqueue_active(&rqw->wait))
+ if (wq_has_sleeper(&rqw->wait))
wake_up_all(&rqw->wait);
}
}
-void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
+static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
+ enum wbt_flags wb_acct)
{
- struct rq_wait *rqw;
int inflight, limit;
- if (!(wb_acct & WBT_TRACKED))
- return;
-
- rqw = get_rq_wait(rwb, wb_acct);
inflight = atomic_dec_return(&rqw->inflight);
/*
@@ -182,14 +178,25 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
if (inflight && inflight >= limit)
return;
- if (waitqueue_active(&rqw->wait)) {
+ if (wq_has_sleeper(&rqw->wait)) {
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
- wake_up(&rqw->wait);
+ wake_up_all(&rqw->wait);
}
}
+void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
+{
+ struct rq_wait *rqw;
+
+ if (!(wb_acct & WBT_TRACKED))
+ return;
+
+ rqw = get_rq_wait(rwb, wb_acct);
+ wbt_rqw_done(rwb, rqw, wb_acct);
+}
+
/*
* Called on completion of a request. Note that it's also called when
* a request is merged, when the request gets freed.
@@ -508,6 +515,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
{
unsigned int limit;
+ /*
+ * If we got disabled, just return UINT_MAX. This ensures that
+ * we'll properly inc a new IO, and dec+wakeup at the end.
+ */
+ if (!rwb_enabled(rwb))
+ return UINT_MAX;
+
if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
return rwb->wb_background;
@@ -533,6 +547,35 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}
+struct wbt_wait_data {
+ struct wait_queue_entry wq;
+ struct task_struct *task;
+ struct rq_wb *rwb;
+ struct rq_wait *rqw;
+ unsigned long rw;
+ bool got_token;
+};
+
+static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+ int wake_flags, void *key)
+{
+ struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data,
+ wq);
+
+ /*
+ * If we fail to get a budget, return -1 to interrupt the wake up
+ * loop in __wake_up_common.
+ */
+ if (!atomic_inc_below(&data->rqw->inflight,
+ get_limit(data->rwb, data->rw)))
+ return -1;
+
+ data->got_token = true;
+ list_del_init(&curr->entry);
+ wake_up_process(data->task);
+ return 1;
+}
+
/*
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
@@ -543,34 +586,42 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
__acquires(lock)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
- DECLARE_WAITQUEUE(wait, current);
-
- /*
- * inc it here even if disabled, since we'll dec it at completion.
- * this only happens if the task was sleeping in __wbt_wait(),
- * and someone turned it off at the same time.
- */
- if (!rwb_enabled(rwb)) {
- atomic_inc(&rqw->inflight);
- return;
- }
-
- if (!waitqueue_active(&rqw->wait)
- && atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)))
+ struct wbt_wait_data data = {
+ .wq = {
+ .func = wbt_wake_function,
+ .entry = LIST_HEAD_INIT(data.wq.entry),
+ },
+ .task = current,
+ .rwb = rwb,
+ .rqw = rqw,
+ .rw = rw,
+ };
+ bool has_sleeper;
+
+ has_sleeper = wq_has_sleeper(&rqw->wait);
+ if (!has_sleeper &&
+ atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)))
return;
- add_wait_queue_exclusive(&rqw->wait, &wait);
+ prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
do {
set_current_state(TASK_UNINTERRUPTIBLE);
+ if (data.got_token)
+ break;
- if (!rwb_enabled(rwb)) {
- atomic_inc(&rqw->inflight);
- break;
- }
-
-
- if (atomic_inc_below(&rqw->inflight, get_limit(rwb, rw)))
+ if (!has_sleeper &&
+ atomic_inc_below(&rqw->inflight, get_limit(rwb, rw))) {
+ finish_wait(&rqw->wait, &data.wq);
+
+ /*
+ * We raced with wbt_wake_function() getting a token,
+ * which means we now have two. Put our local token
+ * and wake anyone else potentially waiting for one.
+ */
+ if (data.got_token)
+ wbt_rqw_done(rwb, rqw, wb_acct);
break;
+ }
if (lock) {
spin_unlock_irq(lock);
@@ -578,10 +629,11 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
spin_lock_irq(lock);
} else
io_schedule();
+
+ has_sleeper = false;
} while (1);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&rqw->wait, &wait);
+ finish_wait(&rqw->wait, &data.wq);
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 48ecc7cb1eb0..4c84dea08c55 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -64,7 +64,7 @@ static void add_early_randomness(struct hwrng *rng)
size_t size = min_t(size_t, 16, rng_buffer_size());
mutex_lock(&reading_mutex);
- bytes_read = rng_get_data(rng, rng_buffer, size, 1);
+ bytes_read = rng_get_data(rng, rng_buffer, size, 0);
mutex_unlock(&reading_mutex);
if (bytes_read > 0)
add_device_randomness(rng_buffer, bytes_read);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 9ae30c60bebe..ded042287c4c 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -96,6 +96,13 @@ void __weak unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
}
#endif
+static inline bool should_stop_iteration(void)
+{
+ if (need_resched())
+ cond_resched();
+ return fatal_signal_pending(current);
+}
+
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
* memory location.
@@ -174,6 +181,8 @@ static ssize_t read_mem(struct file *file, char __user *buf,
p += sz;
count -= sz;
read += sz;
+ if (should_stop_iteration())
+ break;
}
kfree(bounce);
@@ -250,6 +259,8 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
p += sz;
count -= sz;
written += sz;
+ if (should_stop_iteration())
+ break;
}
*ppos += written;
@@ -463,6 +474,10 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
read += sz;
low_count -= sz;
count -= sz;
+ if (should_stop_iteration()) {
+ count = 0;
+ break;
+ }
}
}
@@ -487,6 +502,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
buf += sz;
read += sz;
p += sz;
+ if (should_stop_iteration())
+ break;
}
free_page((unsigned long)kbuf);
}
@@ -539,6 +556,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
p += sz;
count -= sz;
written += sz;
+ if (should_stop_iteration())
+ break;
}
*ppos += written;
@@ -590,6 +609,8 @@ static ssize_t write_kmem(struct file *file, const char __user *buf,
buf += sz;
virtr += sz;
p += sz;
+ if (should_stop_iteration())
+ break;
}
free_page((unsigned long)kbuf);
}
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c5d18610b738..dcbb5cae5814 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3038,6 +3038,7 @@ static int talitos_remove(struct platform_device *ofdev)
break;
case CRYPTO_ALG_TYPE_AEAD:
crypto_unregister_aead(&t_alg->algt.alg.aead);
+ break;
case CRYPTO_ALG_TYPE_AHASH:
crypto_unregister_ahash(&t_alg->algt.alg.hash);
break;
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index d569d26872b9..7573323dc95f 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -412,7 +412,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
bytes = ~0ull;
else if (size & 0x8000)
bytes = (u64)(size & 0x7fff) << 10;
- else if (size != 0x7fff)
+ else if (size != 0x7fff || dm->length < 0x20)
bytes = (u64)size << 20;
else
bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20;
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index c364ef94cc36..77c9f4d8668a 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1813,7 +1813,7 @@ static void si_program_aspm(struct amdgpu_device *adev)
if (orig != data)
si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
- if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) {
+ if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
data &= ~PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
@@ -1862,14 +1862,14 @@ static void si_program_aspm(struct amdgpu_device *adev)
orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
data &= ~LS2_EXIT_TIME_MASK;
- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN))
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
data |= LS2_EXIT_TIME(5);
if (orig != data)
si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
data &= ~LS2_EXIT_TIME_MASK;
- if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN))
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
data |= LS2_EXIT_TIME(5);
if (orig != data)
si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index f0a56f50f334..bcbfe1bde4de 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1406,7 +1406,6 @@ static void ib_cache_event(struct ib_event_handler *handler,
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_GID_CHANGE) {
work = kmalloc(sizeof *work, GFP_ATOMIC);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d71c63107151..860225fe2789 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -804,7 +804,6 @@ static void mcast_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
- case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 604b71875f5f..3421a0b15983 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -74,7 +74,7 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
[BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
- [BNXT_RE_MISSING_RESP] = "missin_resp",
+ [BNXT_RE_MISSING_RESP] = "missing_resp",
[BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
[BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
[BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 346e121e5821..864ad2ea3bd2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1458,7 +1458,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
bnxt_qplib_destroy_srq(&rdev->qplib_res,
&srq->qplib_srq);
- goto exit;
+ goto fail;
}
}
if (nq)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index af63b6332e80..ac49adc884b3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -939,15 +939,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
- props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
- props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
props->cq_caps.max_cq_moderation_count =
MLX5_MAX_CQ_COUNT;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e05bb3ed9ac3..3779821e667e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1388,4 +1388,18 @@ void mlx5_ib_put_xlt_emergency_page(void);
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
+
+static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
+ bool do_modify_atomic)
+{
+ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return false;
+
+ if (do_modify_atomic &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return false;
+
+ return true;
+}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 96592b706e39..ebe5856cd65d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1325,9 +1325,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
- use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
- (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
- !MLX5_CAP_GEN(dev->mdev, atomic));
+ use_umr = mlx5_ib_can_use_umr(dev, true);
if (order <= mr_cache_max_order(dev) && use_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
@@ -1477,7 +1475,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
goto err;
}
- if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
+ if (!mlx5_ib_can_use_umr(dev, true) ||
+ (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 1e88213459f2..ba09068f6200 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -279,8 +279,7 @@ void ipoib_event(struct ib_event_handler *handler,
ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
dev_name(&record->device->dev), record->element.port_num);
- if (record->event == IB_EVENT_SM_CHANGE ||
- record->event == IB_EVENT_CLIENT_REREGISTER) {
+ if (record->event == IB_EVENT_CLIENT_REREGISTER) {
queue_work(ipoib_workqueue, &priv->flush_light);
} else if (record->event == IB_EVENT_PORT_ERR ||
record->event == IB_EVENT_PORT_ACTIVE ||
diff --git a/drivers/media/platform/atmel/atmel-isc.c b/drivers/media/platform/atmel/atmel-isc.c
index 09955f07554d..2f8781e039ed 100644
--- a/drivers/media/platform/atmel/atmel-isc.c
+++ b/drivers/media/platform/atmel/atmel-isc.c
@@ -1719,8 +1719,11 @@ static int isc_parse_dt(struct device *dev, struct isc_device *isc)
break;
}
- subdev_entity->asd = devm_kzalloc(dev,
- sizeof(*subdev_entity->asd), GFP_KERNEL);
+ /* asd will be freed by the subsystem once it's added to the
+ * notifier list
+ */
+ subdev_entity->asd = kzalloc(sizeof(*subdev_entity->asd),
+ GFP_KERNEL);
if (subdev_entity->asd == NULL) {
of_node_put(rem);
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 622bcfb3793e..c343713cedd0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3058,12 +3058,13 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
/* if VF indicate to PF this function is going down (PF will delete sp
* elements and clear initializations
*/
- if (IS_VF(bp))
+ if (IS_VF(bp)) {
+ bnx2x_clear_vlan_info(bp);
bnx2x_vfpf_close_vf(bp);
- else if (unload_mode != UNLOAD_RECOVERY)
+ } else if (unload_mode != UNLOAD_RECOVERY) {
/* if this is a normal/close unload need to clean up chip*/
bnx2x_chip_cleanup(bp, unload_mode, keep_link);
- else {
+ } else {
/* Send the UNLOAD_REQUEST to the MCP */
bnx2x_send_unload_req(bp, unload_mode);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 142bc11b9fbb..69f3ffbac12f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -425,6 +425,8 @@ void bnx2x_set_reset_global(struct bnx2x *bp);
void bnx2x_disable_close_the_gate(struct bnx2x *bp);
int bnx2x_init_hw_func_cnic(struct bnx2x *bp);
+void bnx2x_clear_vlan_info(struct bnx2x *bp);
+
/**
* bnx2x_sp_event - handle ramrods completion.
*
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index f8cadb2e02e6..db042099dc13 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8501,11 +8501,21 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan,
return rc;
}
+void bnx2x_clear_vlan_info(struct bnx2x *bp)
+{
+ struct bnx2x_vlan_entry *vlan;
+
+ /* Mark that hw forgot all entries */
+ list_for_each_entry(vlan, &bp->vlan_reg, link)
+ vlan->hw = false;
+
+ bp->vlan_cnt = 0;
+}
+
static int bnx2x_del_all_vlans(struct bnx2x *bp)
{
struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj;
unsigned long ramrod_flags = 0, vlan_flags = 0;
- struct bnx2x_vlan_entry *vlan;
int rc;
__set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
@@ -8514,10 +8524,7 @@ static int bnx2x_del_all_vlans(struct bnx2x *bp)
if (rc)
return rc;
- /* Mark that hw forgot all entries */
- list_for_each_entry(vlan, &bp->vlan_reg, link)
- vlan->hw = false;
- bp->vlan_cnt = 0;
+ bnx2x_clear_vlan_info(bp);
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 7c5bfc931128..f46202288837 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1066,14 +1066,12 @@ static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
}
}
-static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
- struct cudbg_buffer *dbg_buff,
- struct cudbg_error *cudbg_err,
- u8 mem_type)
+static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init,
+ struct cudbg_error *cudbg_err,
+ u8 mem_type)
{
struct adapter *padap = pdbg_init->adap;
struct cudbg_meminfo mem_info;
- unsigned long size;
u8 mc_idx;
int rc;
@@ -1087,7 +1085,16 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
if (rc)
return rc;
- size = mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+ return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base;
+}
+
+static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err,
+ u8 mem_type)
+{
+ unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type);
+
return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size,
cudbg_err);
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index de169b73a871..326dbfe868f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5626,7 +5626,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
whoami = t4_read_reg(adapter, PL_WHOAMI_A);
pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
- if (chip < 0) {
+ if ((int)chip < 0) {
dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
err = chip;
goto out_free_adapter;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
index 7b2207a2a130..9b3f4205cb4d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c
@@ -98,11 +98,9 @@ found_reuse:
static void t4_smte_free(struct smt_entry *e)
{
- spin_lock_bh(&e->lock);
if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */
e->state = SMT_STATE_UNUSED;
}
- spin_unlock_bh(&e->lock);
}
/**
@@ -112,8 +110,10 @@ static void t4_smte_free(struct smt_entry *e)
*/
void cxgb4_smt_release(struct smt_entry *e)
{
+ spin_lock_bh(&e->lock);
if (atomic_dec_and_test(&e->refcnt))
t4_smte_free(e);
+ spin_unlock_bh(&e->lock);
}
EXPORT_SYMBOL(cxgb4_smt_release);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 557bf4ccdc77..3e4679ab368a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2889,12 +2889,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
if (test_bit(0, &adapter->resetting) &&
adapter->reset_reason == VNIC_RESET_MOBILITY) {
- u64 val = (0xff000000) | scrq->hw_irq;
+ struct irq_desc *desc = irq_to_desc(scrq->irq);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
- rc = plpar_hcall_norets(H_EOI, val);
- if (rc)
- dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
- val, rc);
+ chip->irq_eoi(&desc->irq_data);
}
rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6038ec6118ce..b48e6b5f9f97 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -61,6 +61,7 @@
#include <net/vxlan.h>
#include <net/mpls.h>
#include <net/xdp_sock.h>
+#include <net/xfrm.h>
#include "ixgbe.h"
#include "ixgbe_common.h"
@@ -2646,7 +2647,7 @@ adjust_by_size:
/* 16K ints/sec to 9.2K ints/sec */
avg_wire_size *= 15;
avg_wire_size += 11452;
- } else if (avg_wire_size <= 1980) {
+ } else if (avg_wire_size < 1968) {
/* 9.2K ints/sec to 8K ints/sec */
avg_wire_size *= 5;
avg_wire_size += 22420;
@@ -2679,6 +2680,8 @@ adjust_by_size:
case IXGBE_LINK_SPEED_2_5GB_FULL:
case IXGBE_LINK_SPEED_1GB_FULL:
case IXGBE_LINK_SPEED_10_FULL:
+ if (avg_wire_size > 8064)
+ avg_wire_size = 8064;
itr += DIV_ROUND_UP(avg_wire_size,
IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
IXGBE_ITR_ADAPTIVE_MIN_INC;
@@ -8721,7 +8724,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
#endif /* IXGBE_FCOE */
#ifdef CONFIG_IXGBE_IPSEC
- if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+ if (xfrm_offload(skb) &&
+ !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d47d4f86ac11..454eb42ce06a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1190,7 +1190,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp);
if (err) {
en_err(priv, "Failed to allocate RSS indirection QP\n");
- goto rss_err;
+ goto qp_alloc_err;
}
rss_map->indir_qp->event = mlx4_en_sqp_event;
@@ -1244,6 +1244,7 @@ indir_err:
MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp);
mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
mlx4_qp_free(mdev->dev, rss_map->indir_qp);
+qp_alloc_err:
kfree(rss_map->indir_qp);
rss_map->indir_qp = NULL;
rss_err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1e5015c6677f..e2780ad26ec3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1205,7 +1205,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
err = mlx5_core_set_hca_defaults(dev);
if (err) {
dev_err(&pdev->dev, "Failed to set hca defaults\n");
- goto err_fs;
+ goto err_sriov;
}
err = mlx5_sriov_attach(dev);
@@ -1653,6 +1653,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
{ 0, }
};
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 7c71ea15251f..f380fae8799d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -63,7 +63,12 @@ struct mpa_v2_hdr {
#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
#define QED_IWARP_INVALID_TCP_CID 0xffffffff
-#define QED_IWARP_RCV_WND_SIZE_DEF (256 * 1024)
+
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_2P (200 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_BB_4P (100 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_2P (150 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_DEF_AH_4P (90 * 1024)
+
#define QED_IWARP_RCV_WND_SIZE_MIN (0xffff)
#define TIMESTAMP_HEADER_SIZE (12)
#define QED_IWARP_MAX_FIN_RT_DEFAULT (2)
@@ -532,7 +537,8 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
/* Make sure ep is closed before returning and freeing memory. */
if (ep) {
- while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+ while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED &&
+ wait_count++ < 200)
msleep(100);
if (ep->state != QED_IWARP_EP_CLOSED)
@@ -1022,8 +1028,6 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
params.ep_context = ep;
- ep->state = QED_IWARP_EP_CLOSED;
-
switch (fw_return_code) {
case RDMA_RETURN_OK:
ep->qp->max_rd_atomic_req = ep->cm_info.ord;
@@ -1083,6 +1087,10 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
break;
}
+ if (fw_return_code != RDMA_RETURN_OK)
+ /* paired with READ_ONCE in destroy_qp */
+ smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
ep->event_cb(ep->cb_context, &params);
/* on passive side, if there is no associated QP (REJECT) we need to
@@ -2609,7 +2617,8 @@ qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
static int
qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
- struct qed_rdma_start_in_params *params)
+ struct qed_rdma_start_in_params *params,
+ u32 rcv_wnd_size)
{
struct qed_iwarp_info *iwarp_info;
struct qed_ll2_acquire_data data;
@@ -2637,6 +2646,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+ cbs.slowpath_cb = NULL;
cbs.cookie = p_hwfn;
memset(&data, 0, sizeof(data));
@@ -2675,7 +2685,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
data.input.conn_type = QED_LL2_TYPE_OOO;
data.input.mtu = params->max_mtu;
- n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) /
+ n_ooo_bufs = (QED_IWARP_MAX_OOO * rcv_wnd_size) /
iwarp_info->max_mtu;
n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE);
@@ -2708,6 +2718,8 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
data.input.rx_num_desc = n_ooo_bufs * 2;
data.input.tx_num_desc = data.input.rx_num_desc;
data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
+ data.input.tx_tc = PKT_LB_TC;
+ data.input.tx_dest = QED_LL2_TX_DEST_LB;
data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
data.input.secondary_queue = true;
data.cbs = &cbs;
@@ -2762,16 +2774,30 @@ err:
return rc;
}
+static struct {
+ u32 two_ports;
+ u32 four_ports;
+} qed_iwarp_rcv_wnd_size[MAX_CHIP_IDS] = {
+ {QED_IWARP_RCV_WND_SIZE_DEF_BB_2P, QED_IWARP_RCV_WND_SIZE_DEF_BB_4P},
+ {QED_IWARP_RCV_WND_SIZE_DEF_AH_2P, QED_IWARP_RCV_WND_SIZE_DEF_AH_4P}
+};
+
int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
struct qed_rdma_start_in_params *params)
{
+ struct qed_dev *cdev = p_hwfn->cdev;
struct qed_iwarp_info *iwarp_info;
+ enum chip_ids chip_id;
u32 rcv_wnd_size;
iwarp_info = &p_hwfn->p_rdma_info->iwarp;
iwarp_info->tcp_flags = QED_IWARP_TS_EN;
- rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+ chip_id = QED_IS_BB(cdev) ? CHIP_BB : CHIP_K2;
+ rcv_wnd_size = (qed_device_num_ports(cdev) == 4) ?
+ qed_iwarp_rcv_wnd_size[chip_id].four_ports :
+ qed_iwarp_rcv_wnd_size[chip_id].two_ports;
/* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
@@ -2794,7 +2820,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
qed_iwarp_async_event);
qed_ooo_setup(p_hwfn);
- return qed_iwarp_ll2_start(p_hwfn, params);
+ return qed_iwarp_ll2_start(p_hwfn, params, rcv_wnd_size);
}
int qed_iwarp_stop(struct qed_hwfn *p_hwfn)
@@ -2825,7 +2851,9 @@ static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
0 : -ECONNRESET;
- ep->state = QED_IWARP_EP_CLOSED;
+ /* paired with READ_ONCE in destroy_qp */
+ smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
+
spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
list_del(&ep->list_entry);
spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
@@ -2914,7 +2942,8 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
params.ep_context = ep;
params.cm_info = &ep->cm_info;
- ep->state = QED_IWARP_EP_CLOSED;
+ /* paired with READ_ONCE in destroy_qp */
+ smp_store_release(&ep->state, QED_IWARP_EP_CLOSED);
switch (fw_return_code) {
case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index f0df5193f047..9a1cea8ec957 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -886,10 +886,8 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
int ret;
struct device *dev = &bsp_priv->pdev->dev;
- if (!ldo) {
- dev_err(dev, "no regulator found\n");
- return -1;
- }
+ if (!ldo)
+ return 0;
if (enable) {
ret = regulator_enable(ldo);
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index b179d720d7ae..6d709332d7be 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -212,8 +212,15 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
goto bad_desc;
}
skip:
- if ( rndis &&
- header.usb_cdc_acm_descriptor &&
+ /* Communcation class functions with bmCapabilities are not
+ * RNDIS. But some Wireless class RNDIS functions use
+ * bmCapabilities for their own purpose. The failsafe is
+ * therefore applied only to Communication class RNDIS
+ * functions. The rndis test is redundant, but a cheap
+ * optimization.
+ */
+ if (rndis && is_rndis(&intf->cur_altsetting->desc) &&
+ header.usb_cdc_acm_descriptor &&
header.usb_cdc_acm_descriptor->bmCapabilities) {
dev_dbg(&intf->dev,
"ACM capabilities %02x, not really RNDIS?\n",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index 9ab56827124e..aaa6c7c511d3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -626,7 +626,7 @@ void rtl92cu_fill_fake_txdesc(struct ieee80211_hw *hw, u8 * pDesc,
SET_TX_DESC_NAV_USE_HDR(pDesc, 1);
} else {
SET_TX_DESC_HWSEQ_EN(pDesc, 1); /* Hw set sequence number */
- SET_TX_DESC_PKT_ID(pDesc, 0x100); /* set bit3 to 1. */
+ SET_TX_DESC_PKT_ID(pDesc, BIT(3)); /* set bit3 to 1. */
}
SET_TX_DESC_USE_RATE(pDesc, 1); /* use data rate which is set by Sw */
SET_TX_DESC_OWN(pDesc, 1);
diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index 7e0ce1162c37..af53adcd7ea7 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -2603,7 +2603,7 @@ static int hv_pci_probe(struct hv_device *hdev,
* (2) There will be no overlap between domains (after fixing possible
* collisions) in the same VM.
*/
- dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
+ dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
dom = hv_get_dom_num(dom_req);
if (dom == HVPCI_DOM_INVALID) {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 7e7fc63946a6..a355addeb1c8 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9053,7 +9053,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
}
-#if defined(BUILD_NVME)
/* Clear NVME stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9061,7 +9060,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
}
}
-#endif
/* Clear SCSI stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 474a3c8d77d5..4de542f8e1a2 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -528,7 +528,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
list_del_init(&psb->list);
psb->exch_busy = 0;
psb->status = IOSTAT_SUCCESS;
-#ifdef BUILD_NVME
if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) {
qp->abts_nvme_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
@@ -536,7 +535,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
return;
}
-#endif
qp->abts_scsi_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 1bf9f8ac85e2..b34dc6f1b52b 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1849,8 +1849,7 @@ static int storvsc_probe(struct hv_device *device,
/*
* Set the number of HW queues we are supporting.
*/
- if (stor_device->num_sc != 0)
- host->nr_hw_queues = stor_device->num_sc + 1;
+ host->nr_hw_queues = num_present_cpus();
/*
* Set the error handler work queue.
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index a4dbb0cd80da..0fecc002fa9f 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -724,6 +724,10 @@ static int mts_usb_probe(struct usb_interface *intf,
}
+ if (ep_in_current != &ep_in_set[2]) {
+ MTS_WARNING("couldn't find two input bulk endpoints. Bailing out.\n");
+ return -ENODEV;
+ }
if ( ep_out == -1 ) {
MTS_WARNING( "couldn't find an output bulk endpoint. Bailing out.\n" );
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index dfd54ea4808f..2274f44c6c06 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -81,6 +81,7 @@ struct adu_device {
char serial_number[8];
int open_count; /* number of times this port has been opened */
+ unsigned long disconnected:1;
char *read_buffer_primary;
int read_buffer_length;
@@ -122,7 +123,7 @@ static void adu_abort_transfers(struct adu_device *dev)
{
unsigned long flags;
- if (dev->udev == NULL)
+ if (dev->disconnected)
return;
/* shutdown transfer */
@@ -245,7 +246,7 @@ static int adu_open(struct inode *inode, struct file *file)
}
dev = usb_get_intfdata(interface);
- if (!dev || !dev->udev) {
+ if (!dev) {
retval = -ENODEV;
goto exit_no_device;
}
@@ -328,7 +329,7 @@ static int adu_release(struct inode *inode, struct file *file)
}
adu_release_internal(dev);
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
/* the device was unplugged before the file was released */
if (!dev->open_count) /* ... and we're the last user */
adu_delete(dev);
@@ -357,7 +358,7 @@ static ssize_t adu_read(struct file *file, __user char *buffer, size_t count,
return -ERESTARTSYS;
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto exit;
@@ -522,7 +523,7 @@ static ssize_t adu_write(struct file *file, const __user char *buffer,
goto exit_nolock;
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto exit;
@@ -767,15 +768,19 @@ static void adu_disconnect(struct usb_interface *interface)
dev = usb_get_intfdata(interface);
- mutex_lock(&dev->mtx); /* not interruptible */
- dev->udev = NULL; /* poison */
minor = dev->minor;
usb_deregister_dev(interface, &adu_class);
- mutex_unlock(&dev->mtx);
+
+ usb_poison_urb(dev->interrupt_in_urb);
+ usb_poison_urb(dev->interrupt_out_urb);
mutex_lock(&adutux_mutex);
usb_set_intfdata(interface, NULL);
+ mutex_lock(&dev->mtx); /* not interruptible */
+ dev->disconnected = 1;
+ mutex_unlock(&dev->mtx);
+
/* if the device is not opened, then we clean up right now */
if (!dev->open_count)
adu_delete(dev);
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 0782ac6f5edf..35a46f4a4d96 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -185,7 +185,6 @@ static const struct usb_device_id tower_table[] = {
};
MODULE_DEVICE_TABLE (usb, tower_table);
-static DEFINE_MUTEX(open_disc_mutex);
#define LEGO_USB_TOWER_MINOR_BASE 160
@@ -197,6 +196,7 @@ struct lego_usb_tower {
unsigned char minor; /* the starting minor number for this device */
int open_count; /* number of times this port has been opened */
+ unsigned long disconnected:1;
char* read_buffer;
size_t read_buffer_length; /* this much came in */
@@ -296,8 +296,6 @@ static inline void lego_usb_tower_debug_data(struct device *dev,
*/
static inline void tower_delete (struct lego_usb_tower *dev)
{
- tower_abort_transfers (dev);
-
/* free data structures */
usb_free_urb(dev->interrupt_in_urb);
usb_free_urb(dev->interrupt_out_urb);
@@ -338,18 +336,14 @@ static int tower_open (struct inode *inode, struct file *file)
goto exit;
}
- mutex_lock(&open_disc_mutex);
dev = usb_get_intfdata(interface);
-
if (!dev) {
- mutex_unlock(&open_disc_mutex);
retval = -ENODEV;
goto exit;
}
/* lock this device */
if (mutex_lock_interruptible(&dev->lock)) {
- mutex_unlock(&open_disc_mutex);
retval = -ERESTARTSYS;
goto exit;
}
@@ -357,12 +351,9 @@ static int tower_open (struct inode *inode, struct file *file)
/* allow opening only once */
if (dev->open_count) {
- mutex_unlock(&open_disc_mutex);
retval = -EBUSY;
goto unlock_exit;
}
- dev->open_count = 1;
- mutex_unlock(&open_disc_mutex);
/* reset the tower */
result = usb_control_msg (dev->udev,
@@ -402,13 +393,14 @@ static int tower_open (struct inode *inode, struct file *file)
dev_err(&dev->udev->dev,
"Couldn't submit interrupt_in_urb %d\n", retval);
dev->interrupt_in_running = 0;
- dev->open_count = 0;
goto unlock_exit;
}
/* save device in the file's private structure */
file->private_data = dev;
+ dev->open_count = 1;
+
unlock_exit:
mutex_unlock(&dev->lock);
@@ -429,10 +421,9 @@ static int tower_release (struct inode *inode, struct file *file)
if (dev == NULL) {
retval = -ENODEV;
- goto exit_nolock;
+ goto exit;
}
- mutex_lock(&open_disc_mutex);
if (mutex_lock_interruptible(&dev->lock)) {
retval = -ERESTARTSYS;
goto exit;
@@ -444,7 +435,8 @@ static int tower_release (struct inode *inode, struct file *file)
retval = -ENODEV;
goto unlock_exit;
}
- if (dev->udev == NULL) {
+
+ if (dev->disconnected) {
/* the device was unplugged before the file was released */
/* unlock here as tower_delete frees dev */
@@ -462,10 +454,7 @@ static int tower_release (struct inode *inode, struct file *file)
unlock_exit:
mutex_unlock(&dev->lock);
-
exit:
- mutex_unlock(&open_disc_mutex);
-exit_nolock:
return retval;
}
@@ -483,10 +472,9 @@ static void tower_abort_transfers (struct lego_usb_tower *dev)
if (dev->interrupt_in_running) {
dev->interrupt_in_running = 0;
mb();
- if (dev->udev)
- usb_kill_urb (dev->interrupt_in_urb);
+ usb_kill_urb(dev->interrupt_in_urb);
}
- if (dev->interrupt_out_busy && dev->udev)
+ if (dev->interrupt_out_busy)
usb_kill_urb(dev->interrupt_out_urb);
}
@@ -522,7 +510,7 @@ static unsigned int tower_poll (struct file *file, poll_table *wait)
dev = file->private_data;
- if (!dev->udev)
+ if (dev->disconnected)
return POLLERR | POLLHUP;
poll_wait(file, &dev->read_wait, wait);
@@ -569,7 +557,7 @@ static ssize_t tower_read (struct file *file, char __user *buffer, size_t count,
}
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -655,7 +643,7 @@ static ssize_t tower_write (struct file *file, const char __user *buffer, size_t
}
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -764,7 +752,7 @@ static void tower_interrupt_in_callback (struct urb *urb)
resubmit:
/* resubmit if we're still running */
- if (dev->interrupt_in_running && dev->udev) {
+ if (dev->interrupt_in_running) {
retval = usb_submit_urb (dev->interrupt_in_urb, GFP_ATOMIC);
if (retval)
dev_err(&dev->udev->dev,
@@ -829,6 +817,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
dev->udev = udev;
dev->open_count = 0;
+ dev->disconnected = 0;
dev->read_buffer = NULL;
dev->read_buffer_length = 0;
@@ -896,8 +885,10 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
get_version_reply,
sizeof(*get_version_reply),
1000);
- if (result < 0) {
- dev_err(idev, "LEGO USB Tower get version control request failed\n");
+ if (result < sizeof(*get_version_reply)) {
+ if (result >= 0)
+ result = -EIO;
+ dev_err(idev, "get version request failed: %d\n", result);
retval = result;
goto error;
}
@@ -915,7 +906,6 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
if (retval) {
/* something prevented us from registering this driver */
dev_err(idev, "Not able to get a minor for this device.\n");
- usb_set_intfdata (interface, NULL);
goto error;
}
dev->minor = interface->minor;
@@ -947,23 +937,24 @@ static void tower_disconnect (struct usb_interface *interface)
int minor;
dev = usb_get_intfdata (interface);
- mutex_lock(&open_disc_mutex);
- usb_set_intfdata (interface, NULL);
minor = dev->minor;
- /* give back our minor */
+ /* give back our minor and prevent further open() */
usb_deregister_dev (interface, &tower_class);
+ /* stop I/O */
+ usb_poison_urb(dev->interrupt_in_urb);
+ usb_poison_urb(dev->interrupt_out_urb);
+
mutex_lock(&dev->lock);
- mutex_unlock(&open_disc_mutex);
/* if the device is not opened, then we clean up right now */
if (!dev->open_count) {
mutex_unlock(&dev->lock);
tower_delete (dev);
} else {
- dev->udev = NULL;
+ dev->disconnected = 1;
/* wake up pollers */
wake_up_interruptible_all(&dev->read_wait);
wake_up_interruptible_all(&dev->write_wait);
diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c
index 0f5ad896c7e3..6a50e22e0a72 100644
--- a/drivers/usb/misc/usblcd.c
+++ b/drivers/usb/misc/usblcd.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
@@ -56,6 +57,8 @@ struct usb_lcd {
using up all RAM */
struct usb_anchor submitted; /* URBs to wait for
before suspend */
+ struct rw_semaphore io_rwsem;
+ unsigned long disconnected:1;
};
#define to_lcd_dev(d) container_of(d, struct usb_lcd, kref)
@@ -141,6 +144,13 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
dev = file->private_data;
+ down_read(&dev->io_rwsem);
+
+ if (dev->disconnected) {
+ retval = -ENODEV;
+ goto out_up_io;
+ }
+
/* do a blocking bulk read to get data from the device */
retval = usb_bulk_msg(dev->udev,
usb_rcvbulkpipe(dev->udev,
@@ -157,6 +167,9 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
retval = bytes_read;
}
+out_up_io:
+ up_read(&dev->io_rwsem);
+
return retval;
}
@@ -236,11 +249,18 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
if (r < 0)
return -EINTR;
+ down_read(&dev->io_rwsem);
+
+ if (dev->disconnected) {
+ retval = -ENODEV;
+ goto err_up_io;
+ }
+
/* create a urb, and a buffer for it, and copy the data to the urb */
urb = usb_alloc_urb(0, GFP_KERNEL);
if (!urb) {
retval = -ENOMEM;
- goto err_no_buf;
+ goto err_up_io;
}
buf = usb_alloc_coherent(dev->udev, count, GFP_KERNEL,
@@ -277,6 +297,7 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
the USB core will eventually free it entirely */
usb_free_urb(urb);
+ up_read(&dev->io_rwsem);
exit:
return count;
error_unanchor:
@@ -284,7 +305,8 @@ error_unanchor:
error:
usb_free_coherent(dev->udev, count, buf, urb->transfer_dma);
usb_free_urb(urb);
-err_no_buf:
+err_up_io:
+ up_read(&dev->io_rwsem);
up(&dev->limit_sem);
return retval;
}
@@ -324,6 +346,7 @@ static int lcd_probe(struct usb_interface *interface,
kref_init(&dev->kref);
sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
+ init_rwsem(&dev->io_rwsem);
init_usb_anchor(&dev->submitted);
dev->udev = usb_get_dev(interface_to_usbdev(interface));
@@ -421,6 +444,12 @@ static void lcd_disconnect(struct usb_interface *interface)
/* give back our minor */
usb_deregister_dev(interface, &lcd_class);
+ down_write(&dev->io_rwsem);
+ dev->disconnected = 1;
+ up_write(&dev->io_rwsem);
+
+ usb_kill_anchored_urbs(&dev->submitted);
+
/* decrement our usage count */
kref_put(&dev->kref, lcd_delete);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 320b01580f2e..901aeb7ee81b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2059,23 +2059,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
bool full_sync = 0;
u64 len;
- /*
- * If the inode needs a full sync, make sure we use a full range to
- * avoid log tree corruption, due to hole detection racing with ordered
- * extent completion for adjacent ranges, and assertion failures during
- * hole detection.
- */
- if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
- start = 0;
- end = LLONG_MAX;
- }
-
- /*
- * The range length can be represented by u64, we have to do the typecasts
- * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
- */
- len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
/*
@@ -2093,6 +2076,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
/*
+ * If the inode needs a full sync, make sure we use a full range to
+ * avoid log tree corruption, due to hole detection racing with ordered
+ * extent completion for adjacent ranges, and assertion failures during
+ * hole detection. Do this while holding the inode lock, to avoid races
+ * with other tasks.
+ */
+ if (full_sync) {
+ start = 0;
+ end = LLONG_MAX;
+ }
+
+ /*
+ * The range length can be represented by u64, we have to do the typecasts
+ * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+ */
+ len = (u64)end - (u64)start + 1;
+
+ /*
* We might have have had more pages made dirty after calling
* start_ordered_ops and before acquiring the inode's i_mutex.
*/
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 73dae3757a61..72e45d19016f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5241,6 +5241,14 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
goto out_err;
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
+ if (location->type != BTRFS_INODE_ITEM_KEY &&
+ location->type != BTRFS_ROOT_ITEM_KEY) {
+ btrfs_warn(root->fs_info,
+"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
+ __func__, name, btrfs_ino(BTRFS_I(dir)),
+ location->objectid, location->type, location->offset);
+ goto out_err;
+ }
out:
btrfs_free_path(path);
return ret;
@@ -5564,8 +5572,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return inode;
}
- BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
-
index = srcu_read_lock(&fs_info->subvol_srcu);
ret = fixup_tree_root_location(fs_info, dir, dentry,
&location, &sub_root);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 3b2dc4967ac7..0c4faa4764de 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -813,7 +813,12 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
/* update inode */
inode->i_rdev = le32_to_cpu(info->rdev);
- inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+ /* directories have fl_stripe_unit set to zero */
+ if (le32_to_cpu(info->layout.fl_stripe_unit))
+ inode->i_blkbits =
+ fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+ else
+ inode->i_blkbits = CEPH_BLOCK_SHIFT;
__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
@@ -1950,7 +1955,7 @@ static const struct inode_operations ceph_symlink_iops = {
int __ceph_setattr(struct inode *inode, struct iattr *attr)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- const unsigned int ia_valid = attr->ia_valid;
+ unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_cap_flush *prealloc_cf;
@@ -2055,6 +2060,26 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
}
}
+ if (ia_valid & ATTR_SIZE) {
+ dout("setattr %p size %lld -> %lld\n", inode,
+ inode->i_size, attr->ia_size);
+ if ((issued & CEPH_CAP_FILE_EXCL) &&
+ attr->ia_size > inode->i_size) {
+ i_size_write(inode, attr->ia_size);
+ inode->i_blocks = calc_inode_blocks(attr->ia_size);
+ ci->i_reported_size = attr->ia_size;
+ dirtied |= CEPH_CAP_FILE_EXCL;
+ ia_valid |= ATTR_MTIME;
+ } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+ attr->ia_size != inode->i_size) {
+ req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+ req->r_args.setattr.old_size =
+ cpu_to_le64(inode->i_size);
+ mask |= CEPH_SETATTR_SIZE;
+ release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+ CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+ }
+ }
if (ia_valid & ATTR_MTIME) {
dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
@@ -2077,25 +2102,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
}
}
- if (ia_valid & ATTR_SIZE) {
- dout("setattr %p size %lld -> %lld\n", inode,
- inode->i_size, attr->ia_size);
- if ((issued & CEPH_CAP_FILE_EXCL) &&
- attr->ia_size > inode->i_size) {
- i_size_write(inode, attr->ia_size);
- inode->i_blocks = calc_inode_blocks(attr->ia_size);
- ci->i_reported_size = attr->ia_size;
- dirtied |= CEPH_CAP_FILE_EXCL;
- } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
- attr->ia_size != inode->i_size) {
- req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
- req->r_args.setattr.old_size =
- cpu_to_le64(inode->i_size);
- mask |= CEPH_SETATTR_SIZE;
- release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
- }
- }
/* these do nothing */
if (ia_valid & ATTR_CTIME) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b261af3672cf..54897e034829 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3824,7 +3824,9 @@ static void delayed_work(struct work_struct *work)
pr_info("mds%d hung\n", s->s_mds);
}
}
- if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+ if (s->s_state == CEPH_MDS_SESSION_NEW ||
+ s->s_state == CEPH_MDS_SESSION_RESTARTING ||
+ s->s_state == CEPH_MDS_SESSION_REJECTED) {
/* this mds is failed or recovering, just wait */
ceph_put_mds_session(s);
continue;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e8dbeb9e6192..3aeb1ce4de38 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,7 +140,7 @@ static inline bool is_error_page(struct page *page)
extern struct kmem_cache *kvm_vcpu_cache;
-extern spinlock_t kvm_lock;
+extern struct mutex kvm_lock;
extern struct list_head vm_list;
struct kvm_io_range {
@@ -1020,12 +1020,18 @@ enum kvm_stat_kind {
struct kvm_stat_data {
int offset;
struct kvm *kvm;
+#ifndef __GENKSYMS__
+ int mode;
+#endif
};
struct kvm_stats_debugfs_item {
const char *name;
int offset;
enum kvm_stat_kind kind;
+#ifndef __GENKSYMS__
+ int mode;
+#endif
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
diff --git a/include/linux/net.h b/include/linux/net.h
index e22c368b32f4..428470518bff 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -225,6 +225,7 @@ enum {
int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
int sock_register(const struct net_proto_family *fam);
void sock_unregister(int family);
+bool sock_is_registered(int family);
int __sock_create(struct net *net, int family, int type, int proto,
struct socket **res, int kern);
int sock_create(int family, int type, int proto, struct socket **res);
diff --git a/include/net/psample.h b/include/net/psample.h
index 8888b0e1a82e..b2054cfc5ac8 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -11,6 +11,9 @@ struct psample_group {
u32 group_num;
u32 refcount;
u32 seq;
+#ifndef __GENKSYMS__
+ struct rcu_head rcu;
+#endif
};
struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/include/net/sock.h b/include/net/sock.h
index 26cf751d797f..124a94046c0c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1144,6 +1144,7 @@ struct proto {
int proto_register(struct proto *prot, int alloc_slab);
void proto_unregister(struct proto *prot);
+int sock_load_diag_module(int family, int protocol);
#ifdef SOCK_REFCNT_DEBUG
static inline void sk_refcnt_debug_inc(struct sock *sk)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 48bd2bee750e..20008ab46bd4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -293,8 +293,8 @@ struct ib_rss_caps {
};
enum ib_tm_cap_flags {
- /* Support tag matching on RC transport */
- IB_TM_CAP_RC = 1 << 0,
+ /* Support tag matching with rendezvous offload for RC transport */
+ IB_TM_CAP_RNDV_RC = 1 << 0,
};
struct ib_tm_caps {
diff --git a/kernel/fork.c b/kernel/fork.c
index 11a880b869a9..867b46d6fd0a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2521,7 +2521,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
struct ctl_table t;
int ret;
int threads = max_threads;
- int min = MIN_THREADS;
+ int min = 1;
int max = MAX_THREADS;
t = *table;
@@ -2533,7 +2533,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
if (ret || !write)
return ret;
- set_max_threads(threads);
+ max_threads = threads;
return 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 38e8b578cfcd..cdd565cc332f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5548,6 +5548,7 @@ waitagain:
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
cpumask_clear(iter->started);
+ trace_seq_init(&iter->seq);
iter->pos = -1;
trace_event_read_lock();
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 43caeefb62fd..bbdf3e3ecb99 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -415,7 +415,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct nlmsghdr *nlh;
struct nlattr *nest;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
if (!nlh)
return -EMSGSIZE;
diff --git a/net/core/dev.c b/net/core/dev.c
index b0e333c0d8ac..f2d552fa74d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8453,6 +8453,8 @@ int register_netdevice(struct net_device *dev)
ret = notifier_to_errno(ret);
if (ret) {
rollback_registered(dev);
+ rcu_barrier();
+
dev->reg_state = NETREG_UNREGISTERED;
}
/*
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index da8d1d364fd5..e7d7ad0848ce 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work)
txq = netdev_get_tx_queue(dev, q_index);
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (netif_xmit_frozen_or_stopped(txq) ||
- netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
+ !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
skb_queue_head(&npinfo->txq, skb);
HARD_TX_UNLOCK(dev, txq);
local_irq_restore(flags);
@@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
HARD_TX_UNLOCK(dev, txq);
- if (status == NETDEV_TX_OK)
+ if (dev_xmit_complete(status))
break;
}
@@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
- if (status != NETDEV_TX_OK) {
+ if (!dev_xmit_complete(status)) {
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 11050e7d48c6..0fdb37c4a7ca 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3221,6 +3221,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+ (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+ /* gso_size is untrusted, and we have a frag_list with a linear
+ * non head_frag head.
+ *
+ * (we assume checking the first list_skb member suffices;
+ * i.e if either of the list_skb members have non head_frag
+ * head, then the first one has too).
+ *
+ * If head_skb's headlen does not fit requested gso_size, it
+ * means that the frag_list members do NOT terminate on exact
+ * gso_size boundaries. Hence we cannot perform skb_frag_t page
+ * sharing. Therefore we must fallback to copying the frag_list
+ * skbs; we do so by disabling SG.
+ */
+ if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+ features &= ~NETIF_F_SG;
+ }
+
__skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
diff --git a/net/core/sock.c b/net/core/sock.c
index 813c8cb70b92..477928919f9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3105,6 +3105,28 @@ void proto_unregister(struct proto *prot)
}
EXPORT_SYMBOL(proto_unregister);
+int sock_load_diag_module(int family, int protocol)
+{
+ if (!protocol) {
+ if (!sock_is_registered(family))
+ return -ENOENT;
+
+ return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family);
+ }
+
+#ifdef CONFIG_INET
+ if (family == AF_INET &&
+ protocol != IPPROTO_RAW &&
+ !rcu_access_pointer(inet_protos[protocol]))
+ return -ENOENT;
+#endif
+
+ return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family, protocol);
+}
+EXPORT_SYMBOL(sock_load_diag_module);
+
#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(proto_list_mutex)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..c37b5be7c5e4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
if (sock_diag_handlers[req->sdiag_family] == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, req->sdiag_family);
+ sock_load_diag_module(req->sdiag_family, 0);
mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[req->sdiag_family];
@@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
if (inet_rcv_compat == NULL)
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
mutex_lock(&sock_diag_table_mutex);
if (inet_rcv_compat != NULL)
@@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group)
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET6);
+ sock_load_diag_module(AF_INET6, 0);
break;
}
return 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index d5ecc230c0e9..88c043c3c80b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
if (!inet_diag_table[proto])
- request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET, proto);
+ sock_load_diag_module(AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
if (!inet_diag_table[proto])
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4818b92c7782..c21944f72956 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -907,6 +907,22 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
return mss_now;
}
+/* In some cases, both sendpage() and sendmsg() could have added
+ * an skb to the write queue, but failed adding payload on it.
+ * We need to remove it to consume less memory, but more
+ * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
+ * users.
+ */
+static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+{
+ if (skb && !skb->len &&
+ TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+ tcp_unlink_write_queue(skb, sk);
+ tcp_check_send_head(sk, skb);
+ sk_wmem_free_skb(sk, skb);
+ }
+}
+
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
@@ -1029,6 +1045,7 @@ out:
return copied;
do_error:
+ tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
if (copied)
goto out;
out_err:
@@ -1379,17 +1396,11 @@ out:
out_nopush:
return copied + copied_syn;
+do_error:
+ skb = tcp_write_queue_tail(sk);
do_fault:
- if (!skb->len) {
- tcp_unlink_write_queue(skb, sk);
- /* It is the one place in all of TCP, except connection
- * reset, where we can be unlinking the send_head.
- */
- tcp_check_send_head(sk, skb);
- sk_wmem_free_skb(sk, skb);
- }
+ tcp_remove_empty_skb(sk, skb);
-do_error:
if (copied + copied_syn)
goto out;
out_err:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cf9d5d3ee38..dc52fc1bbcbd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -270,7 +270,7 @@ static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
{
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
}
static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 51024b1dda3d..8a558fed76f1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2083,7 +2083,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor))
+ if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
return false;
len -= skb->len;
@@ -2206,6 +2206,7 @@ static int tcp_mtu_probe(struct sock *sk)
* we need to propagate it to the new skb.
*/
TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor;
+ tcp_skb_collapse_tstamp(nskb, skb);
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
} else {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3c6479b32b97..73fe11cf691a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -773,12 +773,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
im->mca_crcount = idev->mc_qrv;
im->mca_sfmode = pmc->mca_sfmode;
if (pmc->mca_sfmode == MCAST_INCLUDE) {
- im->mca_tomb = pmc->mca_tomb;
- im->mca_sources = pmc->mca_sources;
+ swap(im->mca_tomb, pmc->mca_tomb);
+ swap(im->mca_sources, pmc->mca_sources);
for (psf = im->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = im->mca_crcount;
}
in6_dev_put(pmc->idev);
+ ip6_mc_clear_src(pmc);
kfree(pmc);
}
spin_unlock_bh(&im->mca_lock);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d5cdba8213a4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -233,7 +233,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net)
return ping_proc_register(net, &ping_v6_seq_afinfo);
}
-static void __net_init ping_v6_proc_exit_net(struct net *net)
+static void __net_exit ping_v6_proc_exit_net(struct net *net)
{
return ping_proc_unregister(net, &ping_v6_seq_afinfo);
}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index fb7afcaa3004..33ad7e25a89d 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -1012,10 +1012,13 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- if (sock->type == SOCK_RAW)
+ if (sock->type == SOCK_RAW) {
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
sock->ops = &llcp_rawsock_ops;
- else
+ } else {
sock->ops = &llcp_sock_ops;
+ }
sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
if (sk == NULL)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 3a6ad0f438dc..3da7063549d9 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -156,7 +156,7 @@ static void psample_group_destroy(struct psample_group *group)
{
psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
list_del(&group->list);
- kfree(group);
+ kfree_rcu(group, rcu);
}
static struct psample_group *
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index d8a60011567f..f374a88a47b5 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -99,7 +99,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
s->tcf_action = parm->action;
s->rate = rate;
s->psample_group_num = psample_group_num;
- RCU_INIT_POINTER(s->psample_group, psample_group);
+ rcu_swap_protected(s->psample_group, psample_group,
+ lockdep_is_held(&s->tcf_lock));
if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
s->truncate = true;
@@ -107,6 +108,9 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
}
spin_unlock_bh(&s->tcf_lock);
+ if (psample_group)
+ psample_group_put(psample_group);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index ee775380ac11..186d1bf72110 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -529,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
- if (non_hh_quantum > INT_MAX)
+ if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX)
return -EINVAL;
sch_tree_lock(sch);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index b58adcf1a971..1c5bacb69ede 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1346,7 +1346,7 @@ static int __net_init sctp_ctrlsock_init(struct net *net)
return status;
}
-static void __net_init sctp_ctrlsock_exit(struct net *net)
+static void __net_exit sctp_ctrlsock_exit(struct net *net)
{
/* Free the control endpoint. */
inet_ctl_sock_destroy(net->sctp.ctl_sock);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fdaf8ed9e391..e3e0ec467030 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -541,7 +541,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
if (net->sctp.pf_enable &&
(transport->state == SCTP_ACTIVE) &&
(transport->error_count < transport->pathmaxrxt) &&
- (transport->error_count > asoc->pf_retrans)) {
+ (transport->error_count > transport->pf_retrans)) {
sctp_assoc_control_transport(asoc, transport,
SCTP_TRANSPORT_PF,
diff --git a/net/socket.c b/net/socket.c
index 47be9d784660..1df3055a3c4f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2605,6 +2605,11 @@ void sock_unregister(int family)
}
EXPORT_SYMBOL(sock_unregister);
+bool sock_is_registered(int family)
+{
+ return family < NPROTO && rcu_access_pointer(net_families[family]);
+}
+
static int __init sock_init(void)
{
int err;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..7ebcaff8c1c4 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -224,7 +224,8 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
publ->key);
}
- kfree_rcu(p, rcu);
+ if (p)
+ kfree_rcu(p, rcu);
}
/**
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c434f193f39a..5a2fdbd5244c 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -201,6 +201,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int ret = 0;
/* call only for station! */
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
@@ -218,7 +219,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
if (ie) {
data->flags = 1;
data->length = ie[1];
- memcpy(ssid, ie + 2, data->length);
+ if (data->length > IW_ESSID_MAX_SIZE)
+ ret = -EINVAL;
+ else
+ memcpy(ssid, ie + 2, data->length);
}
rcu_read_unlock();
} else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
@@ -228,7 +232,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
}
wdev_unlock(wdev);
- return 0;
+ return ret;
}
int cfg80211_mgd_wext_siwap(struct net_device *dev,
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index a601e56741e0..4a1b8ea720d7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -29,7 +29,7 @@ all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/objtool/arch/$(ARCH)/include
-CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
+CFLAGS := -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
LDFLAGS += -lelf $(LIBSUBCMD)
# Allow old libelf to be used:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2417978cd268..ab2ca4758055 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_SPINLOCK(kvm_lock);
+DEFINE_MUTEX(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
@@ -658,8 +658,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
+ stat_data->mode = p->mode ? p->mode : 0644;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0644,
+ if (!debugfs_create_file(p->name, stat_data->mode,
kvm->debugfs_dentry,
stat_data,
stat_fops_per_vm[p->kind]))
@@ -729,9 +730,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
preempt_notifier_inc();
@@ -777,9 +778,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -3767,7 +3768,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
return -ENOENT;
- if (simple_attr_open(inode, file, get, set, fmt)) {
+ if (simple_attr_open(inode, file, get,
+ stat_data->mode & S_IWUGO ? set : NULL,
+ fmt)) {
kvm_put_kvm(stat_data->kvm);
return -ENOMEM;
}
@@ -3881,13 +3884,13 @@ static int vm_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3900,12 +3903,12 @@ static int vm_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3920,13 +3923,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3939,12 +3942,12 @@ static int vcpu_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3965,7 +3968,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
if (!kvm_dev.this_device || !kvm)
return;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
if (type == KVM_EVENT_CREATE_VM) {
kvm_createvm_count++;
kvm_active_vms++;
@@ -3974,7 +3977,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
created = kvm_createvm_count;
active = kvm_active_vms;
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL);
if (!env)
@@ -4018,7 +4021,8 @@ static int kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
+ int mode = p->mode ? p->mode : 0644;
+ if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;