Home Home > GIT Browse > linux-next
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOlaf Hering <ohering@suse.de>2019-10-07 12:53:34 +0200
committerOlaf Hering <ohering@suse.de>2019-10-07 12:53:34 +0200
commit46c227c830cfb927be15835bac9e470a363d04f8 (patch)
tree647f6903ce8f15c9878a6a580d2d58209eaac800
parente201004f90e6bfdc8ffb2183a8f76a9b13f14542 (diff)
parentb4f8fefc84be00ae3d92bdf4c6083184379d0301 (diff)
Merge remote-tracking branch 'kerncvs/SLE12-SP4' into SLE12-SP4-AZURErpm-4.12.14-6.26--sle12-sp4-updatesrpm-4.12.14-6.26
-rw-r--r--blacklist.conf6
-rw-r--r--patches.kabi/0001-NFS-Ensure-we-commit-after-writeback-is-complete.kabi10
-rw-r--r--patches.kabi/NFSv4-Fix-OPEN-CLOSE-race.patch77
-rw-r--r--patches.suse/0001-btrfs-qgroup-Fix-the-wrong-target-io_tree-when-freei.patch84
-rw-r--r--patches.suse/0001-btrfs-relocation-fix-use-after-free-on-dead-relocati.patch212
-rw-r--r--patches.suse/0002-btrfs-qgroup-Fix-reserved-data-space-leak-if-we-have.patch90
-rw-r--r--patches.suse/0043-Restrict-dev-mem-and-dev-kmem-when-the-kernel-is-loc.patch57
-rw-r--r--patches.suse/0051-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch14
-rw-r--r--patches.suse/KVM-PPC-Book3S-HV-Fix-lockdep-warning-when-entering-.patch96
-rw-r--r--patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch31
-rw-r--r--patches.suse/NFS-Don-t-open-code-clearing-of-delegation-state.patch75
-rw-r--r--patches.suse/NFS-Ensure-O_DIRECT-reports-an-error-if-the-bytes-re.patch84
-rw-r--r--patches.suse/NFS-Fix-regression-whereby-fscache-errors-are-appear.patch78
-rw-r--r--patches.suse/NFS-Forbid-setting-AF_INET6-to-struct-sockaddr_in-si.patch42
-rw-r--r--patches.suse/NFS-Refactor-nfs_lookup_revalidate.patch293
-rw-r--r--patches.suse/NFS-Remove-redundant-semicolon.patch29
-rw-r--r--patches.suse/NFS4-Fix-v4.0-client-state-corruption-when-mount.patch45
-rw-r--r--patches.suse/NFSv4-Check-the-return-value-of-update_open_stateid.patch49
-rw-r--r--patches.suse/NFSv4-Fix-OPEN-CLOSE-race.patch335
-rw-r--r--patches.suse/NFSv4-Fix-a-potential-sleep-while-atomic-in-nfs4_do_.patch135
-rw-r--r--patches.suse/NFSv4-Fix-an-Oops-in-nfs4_do_setattr.patch32
-rw-r--r--patches.suse/NFSv4-Fix-delegation-state-recovery.patch110
-rw-r--r--patches.suse/NFSv4-Fix-lookup-revalidate-of-regular-files.patch148
-rw-r--r--patches.suse/NFSv4-Handle-the-special-Linux-file-open-access-mode.patch47
-rw-r--r--patches.suse/NFSv4-Only-pass-the-delegation-to-setattr-if-we-re-s.patch57
-rw-r--r--patches.suse/NFSv4-pnfs-Fix-a-page-lock-leak-in-nfs_pageio_resend.patch52
-rw-r--r--patches.suse/NFSv4.1-Again-fix-a-race-where-CB_NOTIFY_LOCK-fails-.patch89
-rw-r--r--patches.suse/NFSv4.1-Fix-open-stateid-recovery.patch168
-rw-r--r--patches.suse/NFSv4.1-Only-reap-expired-delegations.patch64
-rw-r--r--patches.suse/PNFS-fallback-to-MDS-if-no-deviceid-found.patch32
-rw-r--r--patches.suse/SUNRPC-Handle-connection-breakages-correctly-in-call.patch29
-rw-r--r--patches.suse/SUNRPC-fix-regression-in-umount-of-a-secure-mount.patch36
-rw-r--r--patches.suse/SUNRPC-nfs-Fix-return-value-for-nfs4_callback_compou.patch103
-rw-r--r--patches.suse/ceph-use-ceph_evict_inode-to-cleanup-inode-s-resource.patch90
-rw-r--r--patches.suse/eeprom-at24-make-spd-world-readable-again.patch37
-rw-r--r--patches.suse/ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch182
-rw-r--r--patches.suse/module-fix-livepatch-ftrace-module-text-permissions-race.patch170
-rw-r--r--patches.suse/net-ibmvnic-prevent-more-than-one-thread-from-runnin.patch180
-rw-r--r--patches.suse/net-ibmvnic-unlock-rtnl_lock-in-reset-so-linkwatch_e.patch411
-rw-r--r--patches.suse/nfsd-Don-t-release-the-callback-slot-unless-it-was-a.patch81
-rw-r--r--patches.suse/nvmem-use-the-same-permissions-for-eeprom-as-for-nvmem.patch50
-rw-r--r--patches.suse/pNFS-flexfiles-Turn-off-soft-RPC-calls.patch42
-rw-r--r--patches.suse/pnfs-flexfiles-Fix-PTR_ERR-dereferences-in-ff_layout.patch31
-rw-r--r--patches.suse/powerpc-64s-Remove-POWER9-DD1-support.patch7
-rw-r--r--patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch201
-rw-r--r--patches.suse/powerpc-64s-radix-Fix-preempt-imbalance-in-TLB-flush.patch39
-rw-r--r--patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch189
-rw-r--r--patches.suse/powerpc-64s-radix-Improve-TLB-flushing-for-page-tabl.patch217
-rw-r--r--patches.suse/powerpc-64s-radix-Improve-preempt-handling-in-TLB-co.patch164
-rw-r--r--patches.suse/powerpc-64s-radix-Introduce-local-single-page-ceilin.patch99
-rw-r--r--patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch54
-rw-r--r--patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch275
-rw-r--r--patches.suse/powerpc-Drop-page_is_ram-and-walk_system_ram_range.patch142
-rw-r--r--patches.suse/powerpc-book3s64-mm-Don-t-do-tlbie-fixup-for-some-ha.patch80
-rw-r--r--patches.suse/powerpc-book3s64-radix-Rename-CPU_FTR_P9_TLBIE_BUG-f.patch121
-rw-r--r--patches.suse/powerpc-irq-Don-t-WARN-continuously-in-arch_local_ir.patch38
-rw-r--r--patches.suse/powerpc-irq-drop-arch_early_irq_init.patch40
-rw-r--r--patches.suse/powerpc-mm-Fix-typo-in-comments.patch18
-rw-r--r--patches.suse/powerpc-mm-Fixup-tlbie-vs-mtpidr-mtlpidr-ordering-is.patch347
-rw-r--r--patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch67
-rw-r--r--patches.suse/powerpc-mm-Simplify-page_is_ram-by-using-memblock_is.patch43
-rw-r--r--patches.suse/powerpc-mm-Use-memblock-API-for-PPC32-page_is_ram.patch42
-rw-r--r--patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch77
-rw-r--r--patches.suse/powerpc-mm-radix-Drop-unneeded-NULL-check.patch112
-rw-r--r--patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch30
-rw-r--r--patches.suse/powerpc-mm-radix-implement-LPID-based-TLB-flushes-to.patch296
-rw-r--r--patches.suse/powerpc-module64-Fix-comment-in-R_PPC64_ENTRY-handli.patch35
-rw-r--r--patches.suse/powerpc-powernv-Fix-compile-without-CONFIG_TRACEPOIN.patch35
-rw-r--r--patches.suse/powerpc-powernv-move-OPAL-call-wrapper-tracing-and-i.patch719
-rw-r--r--patches.suse/powerpc-powernv-npu-Remove-obsolete-comment-about-TC.patch34
-rw-r--r--patches.suse/powerpc-pseries-Call-H_BLOCK_REMOVE-when-supported.patch137
-rw-r--r--patches.suse/powerpc-pseries-Read-TLB-Block-Invalidate-Characteri.patch207
-rw-r--r--patches.suse/powerpc-pseries-memory-hotplug-Fix-return-value-type.patch186
-rw-r--r--patches.suse/powerpc-xive-Fix-bogus-error-code-returned-by-OPAL.patch92
-rw-r--r--patches.suse/ppp-Fix-memory-leak-in-ppp_write.patch59
-rw-r--r--patches.suse/scsi-scsi_dh_rdac-zero-cdb-in-send_mode_select.patch44
-rw-r--r--series.conf67
77 files changed, 7824 insertions, 572 deletions
diff --git a/blacklist.conf b/blacklist.conf
index 5f12b1523d..93f81e00d7 100644
--- a/blacklist.conf
+++ b/blacklist.conf
@@ -628,6 +628,7 @@ d125f3f866df88da5a85df00291f88f0baa89f7c # ftrace: see above 8114865ff82e200b383
b1b35f2e218a5b57d03bbc3b0667d5064570dc60 # ftrace: see above 8114865ff82e200b383
3cec638b3d793b7cacdec5b8072364b41caeb0e1 # ftrace: memory leak when set_trigger_filter() fails; should not normally happen; not worth it
2840f84f74035e5a535959d5f17269c69fa6edc5 # ftrace: memory leak in pretty advanced scenario
+9f255b632bf12c4dd7fc31caee89aa991ef75176 # livepatch/ftrace: fixes a potential deadlock when loading livepatch and another module in parallel; rather theoretical; also opens can of worms with other possible deadlocks; more than 6 locks involved; see bsc#1152325
c4ff91dd40e2253ab6dd028011469c2c694e1e19 # drm/amd/pp: initialize result to before or'ing in data
9c60583c0b0fd6f3a5b61fda3eb604ce218b9d25 # breaks KABI
@@ -1380,3 +1381,8 @@ c03cd7738a83b13739f00546166969342c8ff014 # not applicable, bsc#1146664
b636fd38dc40113f853337a7d2a6885ad23b8811 # non-functional prereq for c03cd7738a83b13739f00546166969342c8ff014
1b7aebf0487613033aff26420e32fa2076d52846 # doesn't trigger here
3751e008da0df4384031bd66a516c0292f915605 # depends on patch that breaks kABI
+26e53d5ebe2e2a5ff7343e820f0ffd69dd503f8e # obsoleted by dffe8449c5dd63ff18b47709de75553586582cd8
+4f916593be9da38c5cf0d3a5c386b57beb70f422 # duplicate of ddfd151f3def9258397fcde7a372205a2d661903
+813af51f5d30a2da6a2523c08465f9726e51772e # clang not supported
+aea447141c7e7824b81b49acd1bc785506fba46e # clang not supported
+a521c44c3ded9fe184c5de3eed3a442af2d26f00 # book3e not supported
diff --git a/patches.kabi/0001-NFS-Ensure-we-commit-after-writeback-is-complete.kabi b/patches.kabi/0001-NFS-Ensure-we-commit-after-writeback-is-complete.kabi
index 4b69456df8..562a2080ec 100644
--- a/patches.kabi/0001-NFS-Ensure-we-commit-after-writeback-is-complete.kabi
+++ b/patches.kabi/0001-NFS-Ensure-we-commit-after-writeback-is-complete.kabi
@@ -105,18 +105,18 @@ Signed-off-by: NeilBrown <neilb@suse.com>
}
static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
-@@ -1239,7 +1249,7 @@ int nfs_pageio_resend(struct nfs_pageio_
+@@ -1254,7 +1264,7 @@ int nfs_pageio_resend(struct nfs_pageio_
{
- LIST_HEAD(failed);
+ LIST_HEAD(pages);
- desc->pg_io_completion = hdr->io_completion;
+ *pg_io_completion(desc) = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
- while (!list_empty(&hdr->pages)) {
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ list_splice_init(&hdr->pages, &pages);
+ while (!list_empty(&pages)) {
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
-@@ -740,7 +740,7 @@ int nfs_writepages(struct address_space
+@@ -744,7 +744,7 @@ int nfs_writepages(struct address_space
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
&nfs_async_write_completion_ops);
diff --git a/patches.kabi/NFSv4-Fix-OPEN-CLOSE-race.patch b/patches.kabi/NFSv4-Fix-OPEN-CLOSE-race.patch
new file mode 100644
index 0000000000..6126a53991
--- /dev/null
+++ b/patches.kabi/NFSv4-Fix-OPEN-CLOSE-race.patch
@@ -0,0 +1,77 @@
+From: NeilBrown <neilb@suse.de>
+Subject: Fix kabi for: NFSv4: Fix OPEN / CLOSE race
+References: git-fixes
+Patch-mainline: Never, kabi
+
+Adding a waitq to the nfs4_state breaks the kabi.
+So instead use wait_on_bit which provides its own
+waitq.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+---
+ fs/nfs/nfs4_fs.h | 2 --
+ fs/nfs/nfs4proc.c | 10 ++++++----
+ fs/nfs/nfs4state.c | 1 -
+ 3 files changed, 6 insertions(+), 7 deletions(-)
+
+--- a/fs/nfs/nfs4_fs.h
++++ b/fs/nfs/nfs4_fs.h
+@@ -185,8 +185,6 @@ struct nfs4_state {
+ unsigned int n_rdwr; /* Number of read/write references */
+ fmode_t state; /* State on the server (R,W, or RW) */
+ atomic_t count;
+-
+- wait_queue_head_t waitq;
+ };
+
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1399,7 +1399,7 @@ static bool nfs_open_stateid_recover_ope
+ static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
+ {
+ if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
+- wake_up_all(&state->waitq);
++ wake_up_bit(&state->flags, NFS_STATE_CHANGE_WAIT);
+ }
+
+ static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
+@@ -1519,7 +1519,9 @@ static void nfs_clear_open_stateid(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
+ const nfs4_stateid *stateid, nfs4_stateid *freeme)
+ {
+- DEFINE_WAIT(wait);
++ struct wait_queue_head *wq_head = bit_waitqueue(&state->flags,
++ NFS_STATE_CHANGE_WAIT);
++ DEFINE_WAIT_BIT(wbq_entry, &state->flags, NFS_STATE_CHANGE_WAIT);
+ int status = 0;
+ for (;;) {
+
+@@ -1533,7 +1535,7 @@ static void nfs_set_open_stateid_locked(
+ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
+ break;
+
+- prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
++ prepare_to_wait(wq_head, &wbq_entry.wq_entry, TASK_KILLABLE);
+ /*
+ * Ensure we process the state changes in the same order
+ * in which the server processed them by delaying the
+@@ -1549,7 +1551,7 @@ static void nfs_set_open_stateid_locked(
+ status = 0;
+ } else
+ status = -EINTR;
+- finish_wait(&state->waitq, &wait);
++ finish_wait(wq_head, &wbq_entry.wq_entry);
+ rcu_read_lock();
+ spin_lock(&state->owner->so_lock);
+ write_seqlock(&state->seqlock);
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -658,7 +658,6 @@ nfs4_alloc_open_state(void)
+ INIT_LIST_HEAD(&state->lock_states);
+ spin_lock_init(&state->state_lock);
+ seqlock_init(&state->seqlock);
+- init_waitqueue_head(&state->waitq);
+ return state;
+ }
+
diff --git a/patches.suse/0001-btrfs-qgroup-Fix-the-wrong-target-io_tree-when-freei.patch b/patches.suse/0001-btrfs-qgroup-Fix-the-wrong-target-io_tree-when-freei.patch
new file mode 100644
index 0000000000..f0973886fb
--- /dev/null
+++ b/patches.suse/0001-btrfs-qgroup-Fix-the-wrong-target-io_tree-when-freei.patch
@@ -0,0 +1,84 @@
+From bab32fc069ce8829c416e8737c119f62a57970f9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 16 Sep 2019 20:02:38 +0800
+Patch-mainline: v5.4-rc1
+Git-commit: bab32fc069ce8829c416e8737c119f62a57970f9
+References: bsc#1152974
+Subject: [PATCH 1/2] btrfs: qgroup: Fix the wrong target io_tree when freeing
+ reserved data space
+
+[BUG]
+Under the following case with qgroup enabled, if some error happened
+after we have reserved delalloc space, then in error handling path, we
+could cause qgroup data space leakage:
+
+From btrfs_truncate_block() in inode.c:
+
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ block_start, blocksize);
+ if (ret)
+ goto out;
+
+ again:
+ page = find_or_create_page(mapping, index, mask);
+ if (!page) {
+ btrfs_delalloc_release_space(inode, data_reserved,
+ block_start, blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+[CAUSE]
+In the above case, btrfs_delalloc_reserve_space() will call
+btrfs_qgroup_reserve_data() and mark the io_tree range with
+EXTENT_QGROUP_RESERVED flag.
+
+In the error handling path, we have the following call stack:
+btrfs_delalloc_release_space()
+|- btrfs_free_reserved_data_space()
+ |- btrsf_qgroup_free_data()
+ |- __btrfs_qgroup_release_data(reserved=@reserved, free=1)
+ |- qgroup_free_reserved_data(reserved=@reserved)
+ |- clear_record_extent_bits();
+ |- freed += changeset.bytes_changed;
+
+However due to a completion bug, qgroup_free_reserved_data() will clear
+EXTENT_QGROUP_RESERVED flag in BTRFS_I(inode)->io_failure_tree, other
+than the correct BTRFS_I(inode)->io_tree.
+Since io_failure_tree is never marked with that flag,
+btrfs_qgroup_free_data() will not free any data reserved space at all,
+causing a leakage.
+
+This type of error handling can only be triggered by errors outside of
+qgroup code. So EDQUOT error from qgroup can't trigger it.
+
+[FIX]
+Fix the wrong target io_tree.
+
+Reported-by: Josef Bacik <josef@toxicpanda.com>
+Fixes: bc42bda22345 ("btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges")
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 52701c1be109..4ab85555a947 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3486,7 +3486,7 @@ static int qgroup_free_reserved_data(struct inode *inode,
+ * EXTENT_QGROUP_RESERVED, we won't double free.
+ * So not need to rush.
+ */
+- ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
++ ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
+ free_start, free_start + free_len - 1,
+ EXTENT_QGROUP_RESERVED, &changeset);
+ if (ret < 0)
+--
+2.23.0
+
diff --git a/patches.suse/0001-btrfs-relocation-fix-use-after-free-on-dead-relocati.patch b/patches.suse/0001-btrfs-relocation-fix-use-after-free-on-dead-relocati.patch
new file mode 100644
index 0000000000..24474dfcdb
--- /dev/null
+++ b/patches.suse/0001-btrfs-relocation-fix-use-after-free-on-dead-relocati.patch
@@ -0,0 +1,212 @@
+From 1fac4a54374f7ef385938f3c6cf7649c0fe4f6cd Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 23 Sep 2019 14:56:14 +0800
+Patch-mainline: v5.4-rc1
+Git-commit: 1fac4a54374f7ef385938f3c6cf7649c0fe4f6cd
+References: bsc#1152972
+Subject: [PATCH] btrfs: relocation: fix use-after-free on dead relocation
+ roots
+
+[BUG]
+One user reported a reproducible KASAN report about use-after-free:
+
+ BTRFS info (device sdi1): balance: start -dvrange=1256811659264..1256811659265
+ BTRFS info (device sdi1): relocating block group 1256811659264 flags data|raid0
+ ==================================================================
+ BUG: KASAN: use-after-free in btrfs_init_reloc_root+0x2cd/0x340 [btrfs]
+ Write of size 8 at addr ffff88856f671710 by task kworker/u24:10/261579
+
+ CPU: 2 PID: 261579 Comm: kworker/u24:10 Tainted: P OE 5.2.11-arch1-1-kasan #4
+ Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./X99 Extreme4, BIOS P3.80 04/06/2018
+ Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs]
+ Call Trace:
+ dump_stack+0x7b/0xba
+ print_address_description+0x6c/0x22e
+ ? btrfs_init_reloc_root+0x2cd/0x340 [btrfs]
+ __kasan_report.cold+0x1b/0x3b
+ ? btrfs_init_reloc_root+0x2cd/0x340 [btrfs]
+ kasan_report+0x12/0x17
+ __asan_report_store8_noabort+0x17/0x20
+ btrfs_init_reloc_root+0x2cd/0x340 [btrfs]
+ record_root_in_trans+0x2a0/0x370 [btrfs]
+ btrfs_record_root_in_trans+0xf4/0x140 [btrfs]
+ start_transaction+0x1ab/0xe90 [btrfs]
+ btrfs_join_transaction+0x1d/0x20 [btrfs]
+ btrfs_finish_ordered_io+0x7bf/0x18a0 [btrfs]
+ ? lock_repin_lock+0x400/0x400
+ ? __kmem_cache_shutdown.cold+0x140/0x1ad
+ ? btrfs_unlink_subvol+0x9b0/0x9b0 [btrfs]
+ finish_ordered_fn+0x15/0x20 [btrfs]
+ normal_work_helper+0x1bd/0xca0 [btrfs]
+ ? process_one_work+0x819/0x1720
+ ? kasan_check_read+0x11/0x20
+ btrfs_endio_write_helper+0x12/0x20 [btrfs]
+ process_one_work+0x8c9/0x1720
+ ? pwq_dec_nr_in_flight+0x2f0/0x2f0
+ ? worker_thread+0x1d9/0x1030
+ worker_thread+0x98/0x1030
+ kthread+0x2bb/0x3b0
+ ? process_one_work+0x1720/0x1720
+ ? kthread_park+0x120/0x120
+ ret_from_fork+0x35/0x40
+
+ Allocated by task 369692:
+ __kasan_kmalloc.part.0+0x44/0xc0
+ __kasan_kmalloc.constprop.0+0xba/0xc0
+ kasan_kmalloc+0x9/0x10
+ kmem_cache_alloc_trace+0x138/0x260
+ btrfs_read_tree_root+0x92/0x360 [btrfs]
+ btrfs_read_fs_root+0x10/0xb0 [btrfs]
+ create_reloc_root+0x47d/0xa10 [btrfs]
+ btrfs_init_reloc_root+0x1e2/0x340 [btrfs]
+ record_root_in_trans+0x2a0/0x370 [btrfs]
+ btrfs_record_root_in_trans+0xf4/0x140 [btrfs]
+ start_transaction+0x1ab/0xe90 [btrfs]
+ btrfs_start_transaction+0x1e/0x20 [btrfs]
+ __btrfs_prealloc_file_range+0x1c2/0xa00 [btrfs]
+ btrfs_prealloc_file_range+0x13/0x20 [btrfs]
+ prealloc_file_extent_cluster+0x29f/0x570 [btrfs]
+ relocate_file_extent_cluster+0x193/0xc30 [btrfs]
+ relocate_data_extent+0x1f8/0x490 [btrfs]
+ relocate_block_group+0x600/0x1060 [btrfs]
+ btrfs_relocate_block_group+0x3a0/0xa00 [btrfs]
+ btrfs_relocate_chunk+0x9e/0x180 [btrfs]
+ btrfs_balance+0x14e4/0x2fc0 [btrfs]
+ btrfs_ioctl_balance+0x47f/0x640 [btrfs]
+ btrfs_ioctl+0x119d/0x8380 [btrfs]
+ do_vfs_ioctl+0x9f5/0x1060
+ ksys_ioctl+0x67/0x90
+ __x64_sys_ioctl+0x73/0xb0
+ do_syscall_64+0xa5/0x370
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ Freed by task 369692:
+ __kasan_slab_free+0x14f/0x210
+ kasan_slab_free+0xe/0x10
+ kfree+0xd8/0x270
+ btrfs_drop_snapshot+0x154c/0x1eb0 [btrfs]
+ clean_dirty_subvols+0x227/0x340 [btrfs]
+ relocate_block_group+0x972/0x1060 [btrfs]
+ btrfs_relocate_block_group+0x3a0/0xa00 [btrfs]
+ btrfs_relocate_chunk+0x9e/0x180 [btrfs]
+ btrfs_balance+0x14e4/0x2fc0 [btrfs]
+ btrfs_ioctl_balance+0x47f/0x640 [btrfs]
+ btrfs_ioctl+0x119d/0x8380 [btrfs]
+ do_vfs_ioctl+0x9f5/0x1060
+ ksys_ioctl+0x67/0x90
+ __x64_sys_ioctl+0x73/0xb0
+ do_syscall_64+0xa5/0x370
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ The buggy address belongs to the object at ffff88856f671100
+ which belongs to the cache kmalloc-4k of size 4096
+ The buggy address is located 1552 bytes inside of
+ 4096-byte region [ffff88856f671100, ffff88856f672100)
+ The buggy address belongs to the page:
+ page:ffffea0015bd9c00 refcount:1 mapcount:0 mapping:ffff88864400e600 index:0x0 compound_mapcount: 0
+ flags: 0x2ffff0000010200(slab|head)
+ raw: 02ffff0000010200 dead000000000100 dead000000000200 ffff88864400e600
+ raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
+ page dumped because: kasan: bad access detected
+
+ Memory state around the buggy address:
+ ffff88856f671600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88856f671680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ >ffff88856f671700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff88856f671780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88856f671800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ==================================================================
+ BTRFS info (device sdi1): 1 enospc errors during balance
+ BTRFS info (device sdi1): balance: ended with status: -28
+
+[CAUSE]
+The problem happens when finish_ordered_io() get called with balance
+still running, while the reloc root of that subvolume is already dead.
+(Tree is swap already done, but tree not yet deleted for possible qgroup
+usage.)
+
+That means root->reloc_root still exists, but that reloc_root can be
+under btrfs_drop_snapshot(), thus we shouldn't access it.
+
+The following race could cause the use-after-free problem:
+
+ CPU1 | CPU2
+--------------------------------------------------------------------------
+ | relocate_block_group()
+ | |- unset_reloc_control(rc)
+ | |- btrfs_commit_transaction()
+btrfs_finish_ordered_io() | |- clean_dirty_subvols()
+|- btrfs_join_transaction() | |
+ |- record_root_in_trans() | |
+ |- btrfs_init_reloc_root() | |
+ |- if (root->reloc_root) | |
+ | | |- root->reloc_root = NULL
+ | | |- btrfs_drop_snapshot(reloc_root);
+ |- reloc_root->last_trans|
+ = trans->transid |
+ ^^^^^^^^^^^^^^^^^^^^^^
+ Use after free
+
+[FIX]
+Fix it by the following modifications:
+
+- Test if the root has dead reloc tree before accessing root->reloc_root
+ If the root has BTRFS_ROOT_DEAD_RELOC_TREE, then we don't need to
+ create or update root->reloc_tree
+
+- Clear the BTRFS_ROOT_DEAD_RELOC_TREE flag until we have fully dropped
+ reloc tree
+ To co-operate with above modification, so as long as
+ BTRFS_ROOT_DEAD_RELOC_TREE is still set, we won't try to re-create
+ reloc tree at record_root_in_trans().
+
+Reported-by: Cebtenzzre <cebtenzzre@gmail.com>
+Fixes: d2311e698578 ("btrfs: relocation: Delay reloc tree deletion after merge_reloc_roots")
+CC: stable@vger.kernel.org # 5.1+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/relocation.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
+index 2f0e25afa486..00504657b602 100644
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1435,6 +1435,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+ int clear_rsv = 0;
+ int ret;
+
++ /*
++ * The subvolume has reloc tree but the swap is finished, no need to
++ * create/update the dead reloc tree
++ */
++ if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
++ return 0;
++
+ if (root->reloc_root) {
+ reloc_root = root->reloc_root;
+ reloc_root->last_trans = trans->transid;
+@@ -2187,7 +2194,6 @@ static int clean_dirty_subvols(struct reloc_control *rc)
+ /* Merged subvolume, cleanup its reloc root */
+ struct btrfs_root *reloc_root = root->reloc_root;
+
+- clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
+ list_del_init(&root->reloc_dirty_list);
+ root->reloc_root = NULL;
+ if (reloc_root) {
+@@ -2196,6 +2202,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
+ if (ret2 < 0 && !ret)
+ ret = ret2;
+ }
++ clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
+ btrfs_put_fs_root(root);
+ } else {
+ /* Orphan reloc tree, just clean it up */
+--
+2.23.0
+
diff --git a/patches.suse/0002-btrfs-qgroup-Fix-reserved-data-space-leak-if-we-have.patch b/patches.suse/0002-btrfs-qgroup-Fix-reserved-data-space-leak-if-we-have.patch
new file mode 100644
index 0000000000..cba19a4188
--- /dev/null
+++ b/patches.suse/0002-btrfs-qgroup-Fix-reserved-data-space-leak-if-we-have.patch
@@ -0,0 +1,90 @@
+From d4e204948fe3e0dc8e1fbf3f8f3290c9c2823be3 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 16 Sep 2019 20:02:39 +0800
+Patch-mainline: v5.4-rc1
+Git-commit: d4e204948fe3e0dc8e1fbf3f8f3290c9c2823be3
+References: bsc#1152975
+Subject: [PATCH 2/2] btrfs: qgroup: Fix reserved data space leak if we have
+ multiple reserve calls
+
+[BUG]
+The following script can cause btrfs qgroup data space leak:
+
+ mkfs.btrfs -f $dev
+ mount $dev -o nospace_cache $mnt
+
+ btrfs subv create $mnt/subv
+ btrfs quota en $mnt
+ btrfs quota rescan -w $mnt
+ btrfs qgroup limit 128m $mnt/subv
+
+ for (( i = 0; i < 3; i++)); do
+ # Create 3 64M holes for latter fallocate to fail
+ truncate -s 192m $mnt/subv/file
+ xfs_io -c "pwrite 64m 4k" $mnt/subv/file > /dev/null
+ xfs_io -c "pwrite 128m 4k" $mnt/subv/file > /dev/null
+ sync
+
+ # it's supposed to fail, and each failure will leak at least 64M
+ # data space
+ xfs_io -f -c "falloc 0 192m" $mnt/subv/file &> /dev/null
+ rm $mnt/subv/file
+ sync
+ done
+
+ # Shouldn't fail after we removed the file
+ xfs_io -f -c "falloc 0 64m" $mnt/subv/file
+
+[CAUSE]
+Btrfs qgroup data reserve code allow multiple reservations to happen on
+a single extent_changeset:
+E.g:
+ btrfs_qgroup_reserve_data(inode, &data_reserved, 0, SZ_1M);
+ btrfs_qgroup_reserve_data(inode, &data_reserved, SZ_1M, SZ_2M);
+ btrfs_qgroup_reserve_data(inode, &data_reserved, 0, SZ_4M);
+
+Btrfs qgroup code has its internal tracking to make sure we don't
+double-reserve in above example.
+
+The only pattern utilizing this feature is in the main while loop of
+btrfs_fallocate() function.
+
+However btrfs_qgroup_reserve_data()'s error handling has a bug in that
+on error it clears all ranges in the io_tree with EXTENT_QGROUP_RESERVED
+flag but doesn't free previously reserved bytes.
+
+This bug has a two fold effect:
+- Clearing EXTENT_QGROUP_RESERVED ranges
+ This is the correct behavior, but it prevents
+ btrfs_qgroup_check_reserved_leak() to catch the leakage as the
+ detector is purely EXTENT_QGROUP_RESERVED flag based.
+
+- Leak the previously reserved data bytes.
+
+The bug manifests when N calls to btrfs_qgroup_reserve_data are made and
+the last one fails, leaking space reserved in the previous ones.
+
+[FIX]
+Also free previously reserved data bytes when btrfs_qgroup_reserve_data
+fails.
+
+Fixes: 524725537023 ("btrfs: qgroup: Introduce btrfs_qgroup_reserve_data function")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3376,6 +3376,9 @@ cleanup:
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
+ unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
+ GFP_NOFS);
++ /* Also free data bytes of already reserved one */
++ btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
++ orig_reserved, BTRFS_QGROUP_RSV_DATA);
+ extent_changeset_release(reserved);
+ return ret;
+ }
diff --git a/patches.suse/0043-Restrict-dev-mem-and-dev-kmem-when-the-kernel-is-loc.patch b/patches.suse/0043-Restrict-dev-mem-and-dev-kmem-when-the-kernel-is-loc.patch
index b444df79d3..79045c625b 100644
--- a/patches.suse/0043-Restrict-dev-mem-and-dev-kmem-when-the-kernel-is-loc.patch
+++ b/patches.suse/0043-Restrict-dev-mem-and-dev-kmem-when-the-kernel-is-loc.patch
@@ -1,46 +1,39 @@
-From 104cff827b18e35874153bd8df14eba59e5b411a Mon Sep 17 00:00:00 2001
-From: Matthew Garrett <matthew.garrett@nebula.com>
-Date: Wed, 5 Apr 2017 17:40:30 +0100
-Subject: [PATCH 43/62] Restrict /dev/mem and /dev/kmem when the kernel is
+From: Matthew Garrett <mjg59@srcf.ucam.org>
+Date: Mon, 19 Aug 2019 17:17:41 -0700
+Subject: lockdown: Restrict /dev/{mem,kmem,port} when the kernel is
locked down
-Patch-mainline: No, submitted https://patchwork.kernel.org/patch/9665599/
+Patch-mainline: No, submitted https://lkml.org/lkml/2019/8/19/1195
References: fate#314486
-Allowing users to write to address space makes it possible for the kernel to
-be subverted, avoiding module loading restrictions. Prevent this when the
-kernel has been locked down.
+Allowing users to read and write to core kernel memory makes it possible
+for the kernel to be subverted, avoiding module loading restrictions, and
+also to steal cryptographic information.
+
+Disallow /dev/mem and /dev/kmem from being opened this when the kernel has
+been locked down to prevent this.
+
+Also disallow /dev/port from being opened to prevent raw ioport access and
+thus DMA from being used to accomplish the same thing.
-Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com>
Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Matthew Garrett <mjg59@google.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Cc: x86@kernel.org
+Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Lee, Chun-Yi <jlee@suse.com>
+Acked-by: Jean Delvare <jdelvare@suse.de>
---
- drivers/char/mem.c | 6 ++++++
- 1 file changed, 6 insertions(+)
+ drivers/char/mem.c | 2 ++
+ 1 file changed, 2 insertions(+)
-diff --git a/drivers/char/mem.c b/drivers/char/mem.c
-index 6d9cc2d..f814404 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
-@@ -163,6 +163,9 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
- if (p != *ppos)
- return -EFBIG;
+@@ -779,6 +779,8 @@ static loff_t memory_lseek(struct file *
+ static int open_port(struct inode *inode, struct file *filp)
+ {
+ if (kernel_is_locked_down())
+ return -EPERM;
-+
- if (!valid_phys_addr_range(p, count))
- return -EFAULT;
-
-@@ -513,6 +516,9 @@ static ssize_t write_kmem(struct file *file, const char __user *buf,
- char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
- int err = 0;
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+ }
-+ if (kernel_is_locked_down())
-+ return -EPERM;
-+
- if (p < (unsigned long) high_memory) {
- unsigned long to_write = min_t(unsigned long, count,
- (unsigned long)high_memory - p);
---
-2.10.2
-
diff --git a/patches.suse/0051-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch b/patches.suse/0051-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch
index fd28f8af05..48aec179dc 100644
--- a/patches.suse/0051-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch
+++ b/patches.suse/0051-x86-Lock-down-IO-port-access-when-the-kernel-is-lock.patch
@@ -20,8 +20,7 @@ Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Lee, Chun-Yi <jlee@suse.com>
---
arch/x86/kernel/ioport.c | 4 ++--
- drivers/char/mem.c | 2 ++
- 2 files changed, 4 insertions(+), 2 deletions(-)
+ 1 file changed, 2 insertions(+), 2 deletions(-)
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -43,14 +42,3 @@ Acked-by: Lee, Chun-Yi <jlee@suse.com>
return -EPERM;
}
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
---- a/drivers/char/mem.c
-+++ b/drivers/char/mem.c
-@@ -768,6 +768,8 @@ static loff_t memory_lseek(struct file *
-
- static int open_port(struct inode *inode, struct file *filp)
- {
-+ if (kernel_is_locked_down())
-+ return -EPERM;
- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
- }
-
diff --git a/patches.suse/KVM-PPC-Book3S-HV-Fix-lockdep-warning-when-entering-.patch b/patches.suse/KVM-PPC-Book3S-HV-Fix-lockdep-warning-when-entering-.patch
new file mode 100644
index 0000000000..017729fd6b
--- /dev/null
+++ b/patches.suse/KVM-PPC-Book3S-HV-Fix-lockdep-warning-when-entering-.patch
@@ -0,0 +1,96 @@
+From 3309bec85e60d60d6394802cb8e183a4f4a72def Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Fri, 29 Mar 2019 16:40:13 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix lockdep warning when entering the
+ guest
+
+References: bsc#1061840
+Patch-mainline: v5.2-rc1
+Git-commit: 3309bec85e60d60d6394802cb8e183a4f4a72def
+
+The trace_hardirqs_on() sets current->hardirqs_enabled and from here
+the lockdep assumes interrupts are enabled although they are remain
+disabled until the context switches to the guest. Consequent
+srcu_read_lock() checks the flags in rcu_lock_acquire(), observes
+disabled interrupts and prints a warning (see below).
+
+This moves trace_hardirqs_on/off closer to __kvmppc_vcore_entry to
+prevent lockdep from being confused.
+
+DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)
+WARNING: CPU: 16 PID: 8038 at kernel/locking/lockdep.c:4128 check_flags.part.25+0x224/0x280
+[...]
+NIP [c000000000185b84] check_flags.part.25+0x224/0x280
+LR [c000000000185b80] check_flags.part.25+0x220/0x280
+Call Trace:
+[c000003fec253710] [c000000000185b80] check_flags.part.25+0x220/0x280 (unreliable)
+[c000003fec253780] [c000000000187ea4] lock_acquire+0x94/0x260
+[c000003fec253840] [c00800001a1e9768] kvmppc_run_core+0xa60/0x1ab0 [kvm_hv]
+[c000003fec253a10] [c00800001a1ed944] kvmppc_vcpu_run_hv+0x73c/0xec0 [kvm_hv]
+[c000003fec253ae0] [c00800001a1095dc] kvmppc_vcpu_run+0x34/0x48 [kvm]
+[c000003fec253b00] [c00800001a1056bc] kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm]
+[c000003fec253b90] [c00800001a0f3618] kvm_vcpu_ioctl+0x460/0x850 [kvm]
+[c000003fec253d00] [c00000000041c4f4] do_vfs_ioctl+0xe4/0x930
+[c000003fec253db0] [c00000000041ce04] ksys_ioctl+0xc4/0x110
+[c000003fec253e00] [c00000000041ce78] sys_ioctl+0x28/0x80
+[c000003fec253e20] [c00000000000b5a4] system_call+0x5c/0x70
+Instruction dump:
+419e0034 3d220004 39291730 81290000 2f890000 409e0020 3c82ffc6 3c62ffc5
+3884be70 386329c0 4bf6ea71 60000000 <0fe00000> 3c62ffc6 3863be90 4801273d
+irq event stamp: 1025
+hardirqs last enabled at (1025): [<c00800001a1e9728>] kvmppc_run_core+0xa20/0x1ab0 [kvm_hv]
+hardirqs last disabled at (1024): [<c00800001a1e9358>] kvmppc_run_core+0x650/0x1ab0 [kvm_hv]
+softirqs last enabled at (0): [<c0000000000f1210>] copy_process.isra.4.part.5+0x5f0/0x1d00
+softirqs last disabled at (0): [<0000000000000000>] (null)
+---[ end trace 31180adcc848993e ]---
+possible reason: unannotated irqs-off.
+irq event stamp: 1025
+hardirqs last enabled at (1025): [<c00800001a1e9728>] kvmppc_run_core+0xa20/0x1ab0 [kvm_hv]
+hardirqs last disabled at (1024): [<c00800001a1e9358>] kvmppc_run_core+0x650/0x1ab0 [kvm_hv]
+softirqs last enabled at (0): [<c0000000000f1210>] copy_process.isra.4.part.5+0x5f0/0x1d00
+softirqs last disabled at (0): [<0000000000000000>] (null)
+
+Fixes: 8b24e69fc47e ("KVM: PPC: Book3S HV: Close race with testing for signals on guest entry", 2017-06-26)
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_hv.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2962,25 +2962,26 @@ static noinline void kvmppc_run_core(str
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ spin_unlock(&core_info.vc[sub]->lock);
+
+- /*
+- * Interrupts will be enabled once we get into the guest,
+- * so tell lockdep that we're about to enable interrupts.
+- */
+- trace_hardirqs_on();
+-
+ guest_enter_irqoff();
+
+ srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+
+ this_cpu_disable_ftrace();
+
++ /*
++ * Interrupts will be enabled once we get into the guest,
++ * so tell lockdep that we're about to enable interrupts.
++ */
++ trace_hardirqs_on();
++
+ trap = __kvmppc_vcore_entry();
+
++ trace_hardirqs_off();
++
+ this_cpu_enable_ftrace();
+
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+- trace_hardirqs_off();
+ set_irq_happened(trap);
+
+ spin_lock(&vc->lock);
diff --git a/patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch b/patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch
new file mode 100644
index 0000000000..b741ee9d1e
--- /dev/null
+++ b/patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch
@@ -0,0 +1,31 @@
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Sun, 7 Apr 2019 13:59:02 -0400
+Subject: [PATCH] NFS: Don't interrupt file writeout due to fatal errors
+Git-commit: 14bebe3c90b326d2a0df78aed5e9de090c71d878
+Patch-mainline: v5.2
+References: git-fixes
+
+When flushing out dirty pages, the fact that we may hit fatal errors
+is not a reason to stop writeback. Those errors are reported through
+fsync(), not through the flush mechanism.
+
+Fixes: a6598813a4c5b ("NFS: Don't write back further requests if there...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/write.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -668,7 +668,7 @@ out:
+ return ret;
+ out_launder:
+ nfs_write_error_remove_page(req);
+- return ret;
++ return 0;
+ }
+
+ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
diff --git a/patches.suse/NFS-Don-t-open-code-clearing-of-delegation-state.patch b/patches.suse/NFS-Don-t-open-code-clearing-of-delegation-state.patch
new file mode 100644
index 0000000000..1189f52713
--- /dev/null
+++ b/patches.suse/NFS-Don-t-open-code-clearing-of-delegation-state.patch
@@ -0,0 +1,75 @@
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Wed, 5 Sep 2018 14:07:15 -0400
+Subject: [PATCH] NFS: Don't open code clearing of delegation state
+Git-commit: 9f0c5124f4a82503ee5d55c60b0b9c6afc3af68b
+Patch-mainline: v4.19
+References: git-fixes
+
+Add a helper for the case when the nfs4 open state has been set to use
+a delegation stateid, and we want to revert to using the open stateid.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4proc.c | 21 ++++++++++++---------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1605,6 +1605,14 @@ static void nfs_state_set_delegation(str
+ write_sequnlock(&state->seqlock);
+ }
+
++static void nfs_state_clear_delegation(struct nfs4_state *state)
++{
++ write_seqlock(&state->seqlock);
++ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
++ clear_bit(NFS_DELEGATED_STATE, &state->flags);
++ write_sequnlock(&state->seqlock);
++}
++
+ static int update_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *open_stateid,
+ const nfs4_stateid *delegation,
+@@ -2076,10 +2084,7 @@ int nfs4_open_delegation_recall(struct n
+ if (IS_ERR(opendata))
+ return PTR_ERR(opendata);
+ nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
+- write_seqlock(&state->seqlock);
+- nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+- write_sequnlock(&state->seqlock);
+- clear_bit(NFS_DELEGATED_STATE, &state->flags);
++ nfs_state_clear_delegation(state);
+ switch (type & (FMODE_READ|FMODE_WRITE)) {
+ case FMODE_READ|FMODE_WRITE:
+ case FMODE_WRITE:
+@@ -2525,10 +2530,7 @@ static void nfs_finish_clear_delegation_
+ const nfs4_stateid *stateid)
+ {
+ nfs_remove_bad_delegation(state->inode, stateid);
+- write_seqlock(&state->seqlock);
+- nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+- write_sequnlock(&state->seqlock);
+- clear_bit(NFS_DELEGATED_STATE, &state->flags);
++ nfs_state_clear_delegation(state);
+ }
+
+ static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+@@ -2596,13 +2598,14 @@ static void nfs41_check_delegation_state
+ delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+ if (delegation == NULL) {
+ rcu_read_unlock();
++ nfs_state_clear_delegation(state);
+ return;
+ }
+
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ rcu_read_unlock();
+- nfs_finish_clear_delegation_stateid(state, &stateid);
++ nfs_state_clear_delegation(state);
+ return;
+ }
+
diff --git a/patches.suse/NFS-Ensure-O_DIRECT-reports-an-error-if-the-bytes-re.patch b/patches.suse/NFS-Ensure-O_DIRECT-reports-an-error-if-the-bytes-re.patch
new file mode 100644
index 0000000000..b193ac05f5
--- /dev/null
+++ b/patches.suse/NFS-Ensure-O_DIRECT-reports-an-error-if-the-bytes-re.patch
@@ -0,0 +1,84 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Mon, 12 Aug 2019 18:04:36 -0400
+Subject: [PATCH] NFS: Ensure O_DIRECT reports an error if the bytes
+ read/written is 0
+Git-commit: eb2c50da9e256dbbb3ff27694440e4c1900cfef8
+Patch-mainline: v5.3
+References: git-fixes
+
+If the attempt to resend the I/O results in no bytes being read/written,
+we must ensure that we report the error.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Fixes: 0a00b77b331a ("nfs: mirroring support for direct io")
+Cc: stable@vger.kernel.org # v3.20+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/direct.c | 27 ++++++++++++++++++---------
+ fs/nfs/pagelist.c | 1 +
+ 2 files changed, 19 insertions(+), 9 deletions(-)
+
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -397,15 +397,21 @@ static void nfs_direct_read_completion(s
+ unsigned long bytes = 0;
+ struct nfs_direct_req *dreq = hdr->dreq;
+
+- if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+- goto out_put;
+-
+ spin_lock(&dreq->lock);
+- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
++ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ dreq->error = hdr->error;
+- else
++
++ if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
++ spin_unlock(&dreq->lock);
++ goto out_put;
++ }
++
++ if (hdr->good_bytes != 0)
+ nfs_direct_good_bytes(dreq, hdr);
+
++ if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
++ dreq->error = 0;
++
+ spin_unlock(&dreq->lock);
+
+ while (!list_empty(&hdr->pages)) {
+@@ -768,16 +774,19 @@ static void nfs_direct_write_completion(
+ bool request_commit = false;
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+- if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+- goto out_put;
+-
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+ spin_lock(&dreq->lock);
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ dreq->error = hdr->error;
+- if (dreq->error == 0) {
++
++ if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
++ spin_unlock(&dreq->lock);
++ goto out_put;
++ }
++
++ if (hdr->good_bytes != 0) {
+ nfs_direct_good_bytes(dreq, hdr);
+ if (nfs_write_need_commit(hdr)) {
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+--- a/fs/nfs/pagelist.c
++++ b/fs/nfs/pagelist.c
+@@ -1278,6 +1278,7 @@ int nfs_pageio_resend(struct nfs_pageio_
+ if (!list_empty(&pages)) {
+ int err = desc->pg_error < 0 ? desc->pg_error : -EIO;
+ hdr->completion_ops->error_cleanup(&pages);
++ nfs_set_pgio_error(hdr, err, hdr->io_start);
+ return err;
+ }
+ return 0;
diff --git a/patches.suse/NFS-Fix-regression-whereby-fscache-errors-are-appear.patch b/patches.suse/NFS-Fix-regression-whereby-fscache-errors-are-appear.patch
new file mode 100644
index 0000000000..d6e1c5272b
--- /dev/null
+++ b/patches.suse/NFS-Fix-regression-whereby-fscache-errors-are-appear.patch
@@ -0,0 +1,78 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Sat, 3 Aug 2019 13:39:24 -0400
+Subject: [PATCH] NFS: Fix regression whereby fscache errors are appearing on
+ 'nofsc' mounts
+Git-commit: dea1bb35c5f35e0577cfc61f79261d80b8715221
+Patch-mainline: v5.3
+References: git-fixes
+
+People are reporing seeing fscache errors being reported concerning
+duplicate cookies even in cases where they are not setting up fscache
+at all. The rule needs to be that if fscache is not enabled, then it
+should have no side effects at all.
+
+To ensure this is the case, we disable fscache completely on all superblocks
+for which the 'fsc' mount option was not set. In order to avoid issues
+with '-oremount', we also disable the ability to turn fscache on via
+remount.
+
+Fixes: f1fe29b4a02d ("NFS: Use i_writecount to control whether...")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200145
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Steve Dickson <steved@redhat.com>
+Cc: David Howells <dhowells@redhat.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/fscache.c | 7 ++++++-
+ fs/nfs/fscache.h | 2 +-
+ fs/nfs/super.c | 1 +
+ 3 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/fscache.c
++++ b/fs/nfs/fscache.c
+@@ -71,6 +71,10 @@ void nfs_fscache_get_super_cookie(struct
+ struct rb_node **p, *parent;
+ int diff;
+
++ nfss->fscache_key = NULL;
++ nfss->fscache = NULL;
++ if (!(nfss->options & NFS_OPTION_FSCACHE))
++ return;
+ if (!uniq) {
+ uniq = "";
+ ulen = 1;
+@@ -180,10 +184,11 @@ void nfs_fscache_release_super_cookie(st
+ */
+ void nfs_fscache_init_inode(struct inode *inode)
+ {
++ struct nfs_server *nfss = NFS_SERVER(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ nfsi->fscache = NULL;
+- if (!S_ISREG(inode->i_mode))
++ if (!(nfss->fscache && S_ISREG(inode->i_mode)))
+ return;
+ nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
+ &nfs_fscache_inode_object_def,
+--- a/fs/nfs/fscache.h
++++ b/fs/nfs/fscache.h
+@@ -171,7 +171,7 @@ static inline void nfs_fscache_wait_on_i
+ */
+ static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+ {
+- if (server->fscache && (server->options & NFS_OPTION_FSCACHE))
++ if (server->fscache)
+ return "yes";
+ return "no ";
+ }
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -2250,6 +2250,7 @@ nfs_compare_remount_data(struct nfs_serv
+ data->acdirmin != nfss->acdirmin / HZ ||
+ data->acdirmax != nfss->acdirmax / HZ ||
+ data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
++ (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
+ data->nfs_server.port != nfss->port ||
+ data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
+ !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
diff --git a/patches.suse/NFS-Forbid-setting-AF_INET6-to-struct-sockaddr_in-si.patch b/patches.suse/NFS-Forbid-setting-AF_INET6-to-struct-sockaddr_in-si.patch
new file mode 100644
index 0000000000..270fdc5b1a
--- /dev/null
+++ b/patches.suse/NFS-Forbid-setting-AF_INET6-to-struct-sockaddr_in-si.patch
@@ -0,0 +1,42 @@
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Sat, 30 Mar 2019 10:21:07 +0900
+Subject: [PATCH] NFS: Forbid setting AF_INET6 to "struct
+ sockaddr_in"->sin_family.
+Git-commit: 7c2bd9a39845bfb6d72ddb55ce737650271f6f96
+Patch-mainline: v5.1
+References: git-fixes
+
+syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This
+is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family
+(which is embedded into user-visible "struct nfs_mount_data" structure)
+despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6)
+bytes of AF_INET6 address to rpc_sockaddr2uaddr().
+
+Since "struct nfs_mount_data" structure is user-visible, we can't change
+"struct nfs_mount_data" to use "struct sockaddr_storage". Therefore,
+assuming that everybody is using AF_INET family when passing address via
+"struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET.
+
+[1] https://syzkaller.appspot.com/bug?id=599993614e7cbbf66bc2656a919ab2a95fb5d75c
+
+Reported-by: syzbot <syzbot+047a11c361b872896a4f@syzkaller.appspotmail.com>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/super.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -2063,7 +2063,8 @@ static int nfs23_validate_mount_data(voi
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ args->nfs_server.addrlen = sizeof(data->addr);
+ args->nfs_server.port = ntohs(data->addr.sin_port);
+- if (!nfs_verify_server_address(sap))
++ if (sap->sa_family != AF_INET ||
++ !nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (!(data->flags & NFS_MOUNT_TCP))
diff --git a/patches.suse/NFS-Refactor-nfs_lookup_revalidate.patch b/patches.suse/NFS-Refactor-nfs_lookup_revalidate.patch
new file mode 100644
index 0000000000..12bc6b0a20
--- /dev/null
+++ b/patches.suse/NFS-Refactor-nfs_lookup_revalidate.patch
@@ -0,0 +1,293 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Fri, 28 Sep 2018 09:04:05 -0400
+Subject: [PATCH] NFS: Refactor nfs_lookup_revalidate()
+Git-commit: 5ceb9d7fdaaf6d8ced6cd7861cf1deb9cd93fa47
+Patch-mainline: v4.20
+References: git-fixes
+
+Refactor the code in nfs_lookup_revalidate() as a stepping stone towards
+optimising and fixing nfs4_lookup_revalidate().
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/dir.c | 222 +++++++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 126 insertions(+), 96 deletions(-)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1059,6 +1059,100 @@ int nfs_neg_need_reval(struct inode *dir
+ return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
+ }
+
++static int
++nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
++ struct inode *inode, int error)
++{
++ switch (error) {
++ case 1:
++ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
++ __func__, dentry);
++ return 1;
++ case 0:
++ nfs_mark_for_revalidate(dir);
++ if (inode && S_ISDIR(inode->i_mode)) {
++ /* Purge readdir caches. */
++ nfs_zap_caches(inode);
++ /*
++ * We can't d_drop the root of a disconnected tree:
++ * its d_hash is on the s_anon list and d_drop() would hide
++ * it from shrink_dcache_for_unmount(), leading to busy
++ * inodes on unmount and further oopses.
++ */
++ if (IS_ROOT(dentry))
++ return 1;
++ }
++ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
++ __func__, dentry);
++ return 0;
++ }
++ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
++ __func__, dentry, error);
++ return error;
++}
++
++static int
++nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
++ unsigned int flags)
++{
++ int ret = 1;
++ if (nfs_neg_need_reval(dir, dentry, flags)) {
++ if (flags & LOOKUP_RCU)
++ return -ECHILD;
++ ret = 0;
++ }
++ return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
++}
++
++static int
++nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
++ struct inode *inode)
++{
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
++ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
++}
++
++static int
++nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
++ struct inode *inode)
++{
++ struct nfs_fh *fhandle;
++ struct nfs_fattr *fattr;
++ struct nfs4_label *label;
++ int ret;
++
++ ret = -ENOMEM;
++ fhandle = nfs_alloc_fhandle();
++ fattr = nfs_alloc_fattr();
++ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
++ if (fhandle == NULL || fattr == NULL || IS_ERR(label))
++ goto out;
++
++ ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
++ if (ret < 0) {
++ if (ret == -ESTALE || ret == -ENOENT)
++ ret = 0;
++ goto out;
++ }
++ ret = 0;
++ if (nfs_compare_fh(NFS_FH(inode), fhandle))
++ goto out;
++ if (nfs_refresh_inode(inode, fattr) < 0)
++ goto out;
++
++ nfs_setsecurity(inode, fattr, label);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
++
++ /* set a readdirplus hint that we had a cache miss */
++ nfs_force_use_readdirplus(dir);
++ ret = 1;
++out:
++ nfs_free_fattr(fattr);
++ nfs_free_fhandle(fhandle);
++ nfs4_label_free(label);
++ return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
++}
++
+ /*
+ * This is called every time the dcache has a lookup hit,
+ * and we should check whether we can really trust that
+@@ -1070,58 +1164,36 @@ int nfs_neg_need_reval(struct inode *dir
+ * If the parent directory is seen to have changed, we throw out the
+ * cached dentry and do a new lookup.
+ */
+-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++static int
++nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
++ unsigned int flags)
+ {
+- struct inode *dir;
+ struct inode *inode;
+- struct dentry *parent;
+- struct nfs_fh *fhandle = NULL;
+- struct nfs_fattr *fattr = NULL;
+- struct nfs4_label *label = NULL;
+ int error;
+
+- if (flags & LOOKUP_RCU) {
+- parent = READ_ONCE(dentry->d_parent);
+- dir = d_inode_rcu(parent);
+- if (!dir)
+- return -ECHILD;
+- } else {
+- parent = dget_parent(dentry);
+- dir = d_inode(parent);
+- }
+ nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
+ inode = d_inode(dentry);
+
+- if (!inode) {
+- if (nfs_neg_need_reval(dir, dentry, flags)) {
+- if (flags & LOOKUP_RCU)
+- return -ECHILD;
+- goto out_bad;
+- }
+- goto out_valid;
+- }
++ if (!inode)
++ return nfs_lookup_revalidate_negative(dir, dentry, flags);
+
+ if (is_bad_inode(inode)) {
+- if (flags & LOOKUP_RCU)
+- return -ECHILD;
+ dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+ __func__, dentry);
+ goto out_bad;
+ }
+
+ if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+- goto out_set_verifier;
++ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
+
+ /* Force a full look up iff the parent directory has changed */
+ if (!nfs_is_exclusive_create(dir, flags) &&
+ nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+ error = nfs_lookup_verify_inode(inode, flags);
+ if (error) {
+- if (flags & LOOKUP_RCU)
+- return -ECHILD;
+ if (error == -ESTALE)
+- goto out_zap_parent;
+- goto out_error;
++ nfs_zap_caches(dir);
++ goto out_bad;
+ }
+ nfs_advise_use_readdirplus(dir);
+ goto out_valid;
+@@ -1133,81 +1205,39 @@ static int nfs_lookup_revalidate(struct
+ if (NFS_STALE(inode))
+ goto out_bad;
+
+- error = -ENOMEM;
+- fhandle = nfs_alloc_fhandle();
+- fattr = nfs_alloc_fattr();
+- if (fhandle == NULL || fattr == NULL)
+- goto out_error;
+-
+- label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+- if (IS_ERR(label))
+- goto out_error;
+-
+ trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
+- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
++ error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
+ trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
+- if (error == -ESTALE || error == -ENOENT)
+- goto out_bad;
+- if (error)
+- goto out_error;
+- if (nfs_compare_fh(NFS_FH(inode), fhandle))
+- goto out_bad;
+- if ((error = nfs_refresh_inode(inode, fattr)) != 0)
+- goto out_bad;
+-
+- nfs_setsecurity(inode, fattr, label);
+-
+- nfs_free_fattr(fattr);
+- nfs_free_fhandle(fhandle);
+- nfs4_label_free(label);
++ return error;
++out_valid:
++ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
++out_bad:
++ if (flags & LOOKUP_RCU)
++ return -ECHILD;
++ return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
++}
+
+- /* set a readdirplus hint that we had a cache miss */
+- nfs_force_use_readdirplus(dir);
++static int
++nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++{
++ struct dentry *parent;
++ struct inode *dir;
++ int ret;
+
+-out_set_verifier:
+- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+- out_valid:
+ if (flags & LOOKUP_RCU) {
++ parent = READ_ONCE(dentry->d_parent);
++ dir = d_inode_rcu(parent);
++ if (!dir)
++ return -ECHILD;
++ ret = nfs_do_lookup_revalidate(dir, dentry, flags);
+ if (parent != READ_ONCE(dentry->d_parent))
+ return -ECHILD;
+- } else
++ } else {
++ parent = dget_parent(dentry);
++ ret = nfs_do_lookup_revalidate(d_inode(parent), dentry, flags);
+ dput(parent);
+- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+- __func__, dentry);
+- return 1;
+-out_zap_parent:
+- nfs_zap_caches(dir);
+- out_bad:
+- WARN_ON(flags & LOOKUP_RCU);
+- nfs_free_fattr(fattr);
+- nfs_free_fhandle(fhandle);
+- nfs4_label_free(label);
+- nfs_mark_for_revalidate(dir);
+- if (inode && S_ISDIR(inode->i_mode)) {
+- /* Purge readdir caches. */
+- nfs_zap_caches(inode);
+- /*
+- * We can't d_drop the root of a disconnected tree:
+- * its d_hash is on the s_anon list and d_drop() would hide
+- * it from shrink_dcache_for_unmount(), leading to busy
+- * inodes on unmount and further oopses.
+- */
+- if (IS_ROOT(dentry))
+- goto out_valid;
+ }
+- dput(parent);
+- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+- __func__, dentry);
+- return 0;
+-out_error:
+- WARN_ON(flags & LOOKUP_RCU);
+- nfs_free_fattr(fattr);
+- nfs_free_fhandle(fhandle);
+- nfs4_label_free(label);
+- dput(parent);
+- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+- __func__, dentry, error);
+- return error;
++ return ret;
+ }
+
+ /*
diff --git a/patches.suse/NFS-Remove-redundant-semicolon.patch b/patches.suse/NFS-Remove-redundant-semicolon.patch
new file mode 100644
index 0000000000..106bbb97f5
--- /dev/null
+++ b/patches.suse/NFS-Remove-redundant-semicolon.patch
@@ -0,0 +1,29 @@
+From: zhangliguang <zhangliguang@linux.alibaba.com>
+Date: Tue, 12 Feb 2019 09:38:33 +0800
+Subject: [PATCH] NFS: Remove redundant semicolon
+Git-commit: 42f72cf368c502c435af4e206e26d651cfb7d9ad
+Patch-mainline: v5.1
+References: git-fixes
+
+This removes redundant semicolon for ending code.
+
+Fixes: c7944ebb9ce9 ("NFSv4: Fix lookup revalidate of regular files")
+Signed-off-by: Liguang Zhang <zhangliguang@linux.alibaba.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/dir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1633,7 +1633,7 @@ nfs4_do_lookup_revalidate(struct inode *
+ reval_dentry:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+- return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
++ return nfs_lookup_revalidate_dentry(dir, dentry, inode);
+
+ full_reval:
+ return nfs_do_lookup_revalidate(dir, dentry, flags);
diff --git a/patches.suse/NFS4-Fix-v4.0-client-state-corruption-when-mount.patch b/patches.suse/NFS4-Fix-v4.0-client-state-corruption-when-mount.patch
new file mode 100644
index 0000000000..bb622e9f65
--- /dev/null
+++ b/patches.suse/NFS4-Fix-v4.0-client-state-corruption-when-mount.patch
@@ -0,0 +1,45 @@
+From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Date: Mon, 6 May 2019 11:57:03 +0800
+Subject: [PATCH] NFS4: Fix v4.0 client state corruption when mount
+Git-commit: f02f3755dbd14fb935d24b14650fff9ba92243b8
+Patch-mainline: v5.2
+References: git-fixes
+
+stat command with soft mount never return after server is stopped.
+
+When alloc a new client, the state of the client will be set to
+NFS4CLNT_LEASE_EXPIRED.
+
+When the server is stopped, the state manager will work, and accord
+the state to recover. But the state is NFS4CLNT_LEASE_EXPIRED, it
+will drain the slot table and lead other task to wait queue, until
+the client recovered. Then the stat command is hung.
+
+When discover server trunking, the client will renew the lease,
+but check the client state, it lead the client state corruption.
+
+So, we need to call state manager to recover it when detect server
+ip trunking.
+
+Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4state.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -143,6 +143,10 @@ int nfs40_discover_server_trunking(struc
+ /* Sustain the lease, even if it's empty. If the clientid4
+ * goes stale it's of no use for trunking discovery. */
+ nfs4_schedule_state_renewal(*result);
++
++ /* If the client state need to recover, do it. */
++ if (clp->cl_state)
++ nfs4_schedule_state_manager(clp);
+ }
+ out:
+ return status;
diff --git a/patches.suse/NFSv4-Check-the-return-value-of-update_open_stateid.patch b/patches.suse/NFSv4-Check-the-return-value-of-update_open_stateid.patch
new file mode 100644
index 0000000000..1c26a33187
--- /dev/null
+++ b/patches.suse/NFSv4-Check-the-return-value-of-update_open_stateid.patch
@@ -0,0 +1,49 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Mon, 29 Jul 2019 18:25:00 +0100
+Subject: [PATCH] NFSv4: Check the return value of update_open_stateid()
+Git-commit: e3c8dc761ead061da2220ee8f8132f729ac3ddfe
+Patch-mainline: v5.3
+References: git-fixes
+
+Ensure that we always check the return value of update_open_stateid()
+so that we can retry if the update of local state failed. This fixes
+infinite looping on state recovery.
+
+Fixes: e23008ec81ef3 ("NFSv4 reduce attribute requests for open reclaim")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org # v3.7+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4proc.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1814,8 +1814,9 @@ _nfs4_opendata_reclaim_to_nfs4_state(str
+ if (data->o_res.delegation_type != 0)
+ nfs4_opendata_check_deleg(data, state);
+ update:
+- update_open_stateid(state, &data->o_res.stateid, NULL,
+- data->o_arg.fmode);
++ if (!update_open_stateid(state, &data->o_res.stateid,
++ NULL, data->o_arg.fmode))
++ return ERR_PTR(-EAGAIN);
+ atomic_inc(&state->count);
+
+ return state;
+@@ -1847,9 +1848,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4
+ goto err_put_inode;
+ if (data->o_res.delegation_type != 0)
+ nfs4_opendata_check_deleg(data, state);
+- update_open_stateid(state, &data->o_res.stateid, NULL,
+- data->o_arg.fmode);
+- iput(inode);
++ if (!update_open_stateid(state, &data->o_res.stateid,
++ NULL, data->o_arg.fmode)) {
++ nfs4_put_open_state(state);
++ state = ERR_PTR(-EAGAIN);
++ }
+ out:
+ nfs_release_seqid(data->o_arg.seqid);
+ return state;
diff --git a/patches.suse/NFSv4-Fix-OPEN-CLOSE-race.patch b/patches.suse/NFSv4-Fix-OPEN-CLOSE-race.patch
new file mode 100644
index 0000000000..7765e28ce3
--- /dev/null
+++ b/patches.suse/NFSv4-Fix-OPEN-CLOSE-race.patch
@@ -0,0 +1,335 @@
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Mon, 6 Nov 2017 15:28:01 -0500
+Subject: [PATCH] NFSv4: Fix OPEN / CLOSE race
+Git-commit: c9399f21c215453b414702758b8c4b7d66605eac
+Patch-mainline: v4.15
+References: git-fixes
+
+Ben Coddington has noted the following race between OPEN and CLOSE
+on a single client.
+
+Process 1 Process 2 Server
+========= ========= ======
+
+1) OPEN file
+2) OPEN file
+3) Process OPEN (1) seqid=1
+4) Process OPEN (2) seqid=2
+5) Reply OPEN (2)
+6) Receive reply (2)
+7) new stateid, seqid=2
+
+8) CLOSE file, using
+ stateid w/ seqid=2
+9) Reply OPEN (1)
+10( Process CLOSE (8)
+11) Reply CLOSE (8)
+12) Forget stateid
+ file closed
+
+13) Receive reply (7)
+14) Forget stateid
+ file closed.
+
+15) Receive reply (1).
+16) New stateid seqid=1
+ is really the same
+ stateid that was
+ closed.
+
+Iow: the reply to the first OPEN is delayed. Since "Process 2" does
+not wait before closing the file, and it does not cache the closed
+stateid, then when the delayed reply is finally received, it is treated
+as setting up a new stateid by the client.
+
+The fix is to ensure that the client processes the OPEN and CLOSE calls
+in the same order in which the server processed them.
+
+This commit ensures that we examine the seqid of the stateid
+returned by OPEN. If it is a new stateid, we assume the seqid
+must be equal to the value 1, and that each state transition
+increments the seqid value by 1 (See RFC7530, Section 9.1.4.2,
+and RFC5661, Section 8.2.2).
+
+If the tracker sees that an OPEN returns with a seqid that is greater
+than the cached seqid + 1, then it bumps a flag to ensure that the
+caller waits for the RPCs carrying the missing seqids to complete.
+
+Note that there can still be pathologies where the server crashes before
+it can even send us the missing seqids. Since the OPEN call is still
+holding a slot when it waits here, that could cause the recovery to
+stall forever. To avoid that, we time out after a 5 second wait.
+
+Reported-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4_fs.h | 3 +
+ fs/nfs/nfs4proc.c | 154 ++++++++++++++++++++++++++++++++++++++++-------------
+ fs/nfs/nfs4state.c | 1
+ 3 files changed, 123 insertions(+), 35 deletions(-)
+
+--- a/fs/nfs/nfs4_fs.h
++++ b/fs/nfs/nfs4_fs.h
+@@ -161,6 +161,7 @@ enum {
+ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
+ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
+ NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
++ NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
+ };
+
+ struct nfs4_state {
+@@ -184,6 +185,8 @@ struct nfs4_state {
+ unsigned int n_rdwr; /* Number of read/write references */
+ fmode_t state; /* State on the server (R,W, or RW) */
+ atomic_t count;
++
++ wait_queue_head_t waitq;
+ };
+
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1396,6 +1396,25 @@ static bool nfs_open_stateid_recover_ope
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+
++static void nfs_state_log_update_open_stateid(struct nfs4_state *state)
++{
++ if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
++ wake_up_all(&state->waitq);
++}
++
++static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state,
++ const nfs4_stateid *stateid)
++{
++ u32 state_seqid = be32_to_cpu(state->open_stateid.seqid);
++ u32 stateid_seqid = be32_to_cpu(stateid->seqid);
++
++ if (stateid_seqid == state_seqid + 1U ||
++ (stateid_seqid == 1U && state_seqid == 0xffffffffU))
++ nfs_state_log_update_open_stateid(state);
++ else
++ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
++}
++
+ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
+ {
+ struct nfs_client *clp = state->owner->so_server->nfs_client;
+@@ -1411,18 +1430,32 @@ static void nfs_test_and_clear_all_open_
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ }
+
++/*
++ * Check for whether or not the caller may update the open stateid
++ * to the value passed in by stateid.
++ *
++ * Note: This function relies heavily on the server implementing
++ * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2
++ * correctly.
++ * i.e. The stateid seqids have to be initialised to 1, and
++ * are then incremented on every state transition.
++ */
+ static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+- const nfs4_stateid *stateid, nfs4_stateid *freeme)
++ const nfs4_stateid *stateid)
+ {
+- if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
+- return true;
+- if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+- nfs4_stateid_copy(freeme, &state->open_stateid);
+- nfs_test_and_clear_all_open_stateid(state);
++ if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 ||
++ !nfs4_stateid_match_other(stateid, &state->open_stateid)) {
++ if (stateid->seqid == cpu_to_be32(1))
++ nfs_state_log_update_open_stateid(state);
++ else
++ set_bit(NFS_STATE_CHANGE_WAIT, &state->flags);
+ return true;
+ }
+- if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
++
++ if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
++ nfs_state_log_out_of_order_open_stateid(state, stateid);
+ return true;
++ }
+ return false;
+ }
+
+@@ -1461,11 +1494,13 @@ static void nfs_clear_open_stateid_locke
+ if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+ !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
+ nfs_resync_open_stateid_locked(state);
+- return;
++ goto out;
+ }
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
++out:
++ nfs_state_log_update_open_stateid(state);
+ }
+
+ static void nfs_clear_open_stateid(struct nfs4_state *state,
+@@ -1482,29 +1517,57 @@ static void nfs_clear_open_stateid(struc
+ }
+
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
+- const nfs4_stateid *stateid, fmode_t fmode,
+- nfs4_stateid *freeme)
++ const nfs4_stateid *stateid, nfs4_stateid *freeme)
+ {
+- switch (fmode) {
+- case FMODE_READ:
+- set_bit(NFS_O_RDONLY_STATE, &state->flags);
++ DEFINE_WAIT(wait);
++ int status = 0;
++ for (;;) {
++
++ if (!nfs_need_update_open_stateid(state, stateid))
++ return;
++ if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags))
++ break;
++ if (status)
+ break;
+- case FMODE_WRITE:
+- set_bit(NFS_O_WRONLY_STATE, &state->flags);
++ /* Rely on seqids for serialisation with NFSv4.0 */
++ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client))
+ break;
+- case FMODE_READ|FMODE_WRITE:
+- set_bit(NFS_O_RDWR_STATE, &state->flags);
++
++ prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE);
++ /*
++ * Ensure we process the state changes in the same order
++ * in which the server processed them by delaying the
++ * update of the stateid until we are in sequence.
++ */
++ write_sequnlock(&state->seqlock);
++ spin_unlock(&state->owner->so_lock);
++ rcu_read_unlock();
++ if (!signal_pending(current)) {
++ if (schedule_timeout(5*HZ) == 0)
++ status = -EAGAIN;
++ else
++ status = 0;
++ } else
++ status = -EINTR;
++ finish_wait(&state->waitq, &wait);
++ rcu_read_lock();
++ spin_lock(&state->owner->so_lock);
++ write_seqlock(&state->seqlock);
+ }
+- if (!nfs_need_update_open_stateid(state, stateid, freeme))
+- return;
++
++ if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
++ nfs4_stateid_copy(freeme, &state->open_stateid);
++ nfs_test_and_clear_all_open_stateid(state);
++ }
++
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
++ nfs_state_log_update_open_stateid(state);
+ }
+
+-static void __update_open_stateid(struct nfs4_state *state,
++static void nfs_state_set_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *open_stateid,
+- const nfs4_stateid *deleg_stateid,
+ fmode_t fmode,
+ nfs4_stateid *freeme)
+ {
+@@ -1512,17 +1575,34 @@ static void __update_open_stateid(struct
+ * Protect the call to nfs4_state_set_mode_locked and
+ * serialise the stateid update
+ */
+- spin_lock(&state->owner->so_lock);
+ write_seqlock(&state->seqlock);
+- if (deleg_stateid != NULL) {
+- nfs4_stateid_copy(&state->stateid, deleg_stateid);
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
++ nfs_set_open_stateid_locked(state, open_stateid, freeme);
++ switch (fmode) {
++ case FMODE_READ:
++ set_bit(NFS_O_RDONLY_STATE, &state->flags);
++ break;
++ case FMODE_WRITE:
++ set_bit(NFS_O_WRONLY_STATE, &state->flags);
++ break;
++ case FMODE_READ|FMODE_WRITE:
++ set_bit(NFS_O_RDWR_STATE, &state->flags);
+ }
+- if (open_stateid != NULL)
+- nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme);
++ set_bit(NFS_OPEN_STATE, &state->flags);
++ write_sequnlock(&state->seqlock);
++}
++
++static void nfs_state_set_delegation(struct nfs4_state *state,
++ const nfs4_stateid *deleg_stateid,
++ fmode_t fmode)
++{
++ /*
++ * Protect the call to nfs4_state_set_mode_locked and
++ * serialise the stateid update
++ */
++ write_seqlock(&state->seqlock);
++ nfs4_stateid_copy(&state->stateid, deleg_stateid);
++ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ write_sequnlock(&state->seqlock);
+- update_open_stateflags(state, fmode);
+- spin_unlock(&state->owner->so_lock);
+ }
+
+ static int update_open_stateid(struct nfs4_state *state,
+@@ -1540,6 +1620,12 @@ static int update_open_stateid(struct nf
+ fmode &= (FMODE_READ|FMODE_WRITE);
+
+ rcu_read_lock();
++ spin_lock(&state->owner->so_lock);
++ if (open_stateid != NULL) {
++ nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme);
++ ret = 1;
++ }
++
+ deleg_cur = rcu_dereference(nfsi->delegation);
+ if (deleg_cur == NULL)
+ goto no_delegation;
+@@ -1556,18 +1642,16 @@ static int update_open_stateid(struct nf
+ goto no_delegation_unlock;
+
+ nfs_mark_delegation_referenced(deleg_cur);
+- __update_open_stateid(state, open_stateid, &deleg_cur->stateid,
+- fmode, &freeme);
++ nfs_state_set_delegation(state, &deleg_cur->stateid, fmode);
+ ret = 1;
+ no_delegation_unlock:
+ spin_unlock(&deleg_cur->lock);
+ no_delegation:
++ if (ret)
++ update_open_stateflags(state, fmode);
++ spin_unlock(&state->owner->so_lock);
+ rcu_read_unlock();
+
+- if (!ret && open_stateid != NULL) {
+- __update_open_stateid(state, open_stateid, NULL, fmode, &freeme);
+- ret = 1;
+- }
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(clp);
+ if (freeme.type != 0)
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -639,6 +639,7 @@ nfs4_alloc_open_state(void)
+ INIT_LIST_HEAD(&state->lock_states);
+ spin_lock_init(&state->state_lock);
+ seqlock_init(&state->seqlock);
++ init_waitqueue_head(&state->waitq);
+ return state;
+ }
+
diff --git a/patches.suse/NFSv4-Fix-a-potential-sleep-while-atomic-in-nfs4_do_.patch b/patches.suse/NFSv4-Fix-a-potential-sleep-while-atomic-in-nfs4_do_.patch
new file mode 100644
index 0000000000..44be362616
--- /dev/null
+++ b/patches.suse/NFSv4-Fix-a-potential-sleep-while-atomic-in-nfs4_do_.patch
@@ -0,0 +1,135 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Sat, 3 Aug 2019 10:11:27 -0400
+Subject: [PATCH] NFSv4: Fix a potential sleep while atomic in
+ nfs4_do_reclaim()
+Git-commit: c77e22834ae9a11891cb613bd9a551be1b94f2bc
+Patch-mainline: v5.3
+References: git-fixes
+
+John Hubbard reports seeing the following stack trace:
+
+nfs4_do_reclaim
+ rcu_read_lock /* we are now in_atomic() and must not sleep */
+ nfs4_purge_state_owners
+ nfs4_free_state_owner
+ nfs4_destroy_seqid_counter
+ rpc_destroy_wait_queue
+ cancel_delayed_work_sync
+ __cancel_work_timer
+ __flush_work
+ start_flush_work
+ might_sleep:
+ (kernel/workqueue.c:2975: BUG)
+
+The solution is to separate out the freeing of the state owners
+from nfs4_purge_state_owners(), and perform that outside the atomic
+context.
+
+Reported-by: John Hubbard <jhubbard@nvidia.com>
+Fixes: 0aaaf5c424c7f ("NFS: Cache state owners after files are closed")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4_fs.h | 3 ++-
+ fs/nfs/nfs4client.c | 5 ++++-
+ fs/nfs/nfs4state.c | 27 ++++++++++++++++++++++-----
+ 3 files changed, 28 insertions(+), 7 deletions(-)
+
+--- a/fs/nfs/nfs4_fs.h
++++ b/fs/nfs/nfs4_fs.h
+@@ -435,7 +435,8 @@ static inline void nfs4_schedule_session
+
+ extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
+ extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+-extern void nfs4_purge_state_owners(struct nfs_server *);
++extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *);
++extern void nfs4_free_state_owners(struct list_head *head);
+ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+ extern void nfs4_put_open_state(struct nfs4_state *);
+ extern void nfs4_close_state(struct nfs4_state *, fmode_t);
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -738,9 +738,12 @@ out:
+
+ static void nfs4_destroy_server(struct nfs_server *server)
+ {
++ LIST_HEAD(freeme);
++
+ nfs_server_return_all_delegations(server);
+ unset_pnfs_layoutdriver(server);
+- nfs4_purge_state_owners(server);
++ nfs4_purge_state_owners(server, &freeme);
++ nfs4_free_state_owners(&freeme);
+ }
+
+ /*
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -608,24 +608,39 @@ void nfs4_put_state_owner(struct nfs4_st
+ /**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
++ * @head: resulting list of state owners
+ *
+ * Called at umount time. Remaining state owners will be on
+ * the LRU with ref count of zero.
++ * Note that the state owners are not freed, but are added
++ * to the list @head, which can later be used as an argument
++ * to nfs4_free_state_owners.
+ */
+-void nfs4_purge_state_owners(struct nfs_server *server)
++void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
+ {
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_state_owner *sp, *tmp;
+- LIST_HEAD(doomed);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+- list_move(&sp->so_lru, &doomed);
++ list_move(&sp->so_lru, head);
+ nfs4_remove_state_owner_locked(sp);
+ }
+ spin_unlock(&clp->cl_lock);
++}
++
++/**
++ * nfs4_purge_state_owners - Release all cached state owners
++ * @head: resulting list of state owners
++ *
++ * Frees a list of state owners that was generated by
++ * nfs4_purge_state_owners
++ */
++void nfs4_free_state_owners(struct list_head *head)
++{
++ struct nfs4_state_owner *sp, *tmp;
+
+- list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
++ list_for_each_entry_safe(sp, tmp, head, so_lru) {
+ list_del(&sp->so_lru);
+ nfs4_free_state_owner(sp);
+ }
+@@ -1777,12 +1792,13 @@ static int nfs4_do_reclaim(struct nfs_cl
+ struct nfs4_state_owner *sp;
+ struct nfs_server *server;
+ struct rb_node *pos;
++ LIST_HEAD(freeme);
+ int status = 0;
+
+ restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+- nfs4_purge_state_owners(server);
++ nfs4_purge_state_owners(server, &freeme);
+ spin_lock(&clp->cl_lock);
+ for (pos = rb_first(&server->state_owners);
+ pos != NULL;
+@@ -1811,6 +1827,7 @@ restart:
+ spin_unlock(&clp->cl_lock);
+ }
+ rcu_read_unlock();
++ nfs4_free_state_owners(&freeme);
+ return 0;
+ }
+
diff --git a/patches.suse/NFSv4-Fix-an-Oops-in-nfs4_do_setattr.patch b/patches.suse/NFSv4-Fix-an-Oops-in-nfs4_do_setattr.patch
new file mode 100644
index 0000000000..a3b72c2013
--- /dev/null
+++ b/patches.suse/NFSv4-Fix-an-Oops-in-nfs4_do_setattr.patch
@@ -0,0 +1,32 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Sat, 3 Aug 2019 10:28:18 -0400
+Subject: [PATCH] NFSv4: Fix an Oops in nfs4_do_setattr
+Git-commit: 09a54f0ebfe263bc27c90bbd80187b9a93283887
+Patch-mainline: v5.3
+References: git-fixes
+
+If the user specifies an open mode of 3, then we don't have a NFSv4 state
+attached to the context, and so we Oops when we try to dereference it.
+
+Reported-by: Olga Kornievskaia <aglo@umich.edu>
+Fixes: 29b59f9416937 ("NFSv4: change nfs4_do_setattr to take...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org # v4.10: 991eedb1371dc: NFSv4: Only pass the...
+Cc: stable@vger.kernel.org # v4.10+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4proc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3027,7 +3027,7 @@ static int _nfs4_do_setattr(struct inode
+
+ if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
+ /* Use that stateid */
+- } else if (ctx != NULL) {
++ } else if (ctx != NULL && ctx->state) {
+ struct nfs_lock_context *l_ctx;
+ if (!nfs4_valid_open_stateid(ctx->state))
+ return -EBADF;
diff --git a/patches.suse/NFSv4-Fix-delegation-state-recovery.patch b/patches.suse/NFSv4-Fix-delegation-state-recovery.patch
new file mode 100644
index 0000000000..c12af180a8
--- /dev/null
+++ b/patches.suse/NFSv4-Fix-delegation-state-recovery.patch
@@ -0,0 +1,110 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Fri, 19 Jul 2019 14:08:37 -0400
+Subject: [PATCH] NFSv4: Fix delegation state recovery
+Git-commit: 5eb8d18ca0e001c6055da2b7f30d8f6dca23a44f
+Patch-mainline: v5.3
+References: git-fixes
+
+Once we clear the NFS_DELEGATED_STATE flag, we're telling
+nfs_delegation_claim_opens() that we're done recovering all open state
+for that stateid, so we really need to ensure that we test for all
+open modes that are currently cached and recover them before exiting
+nfs4_open_delegation_recall().
+
+Fixes: 24311f884189d ("NFSv4: Recovery of recalled read delegations...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org # v4.3+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/delegation.c | 2 +-
+ fs/nfs/delegation.h | 2 +-
+ fs/nfs/nfs4proc.c | 24 ++++++++++++------------
+ 3 files changed, 14 insertions(+), 14 deletions(-)
+
+--- a/fs/nfs/delegation.c
++++ b/fs/nfs/delegation.c
+@@ -152,7 +152,7 @@ again:
+ /* Block nfs4_proc_unlck */
+ mutex_lock(&sp->so_delegreturn_mutex);
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+- err = nfs4_open_delegation_recall(ctx, state, stateid, type);
++ err = nfs4_open_delegation_recall(ctx, state, stateid);
+ if (!err)
+ err = nfs_delegation_claim_locks(ctx, state, stateid);
+ if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+--- a/fs/nfs/delegation.h
++++ b/fs/nfs/delegation.h
+@@ -59,7 +59,7 @@ void nfs_reap_expired_delegations(struct
+
+ /* NFSv4 delegation-related procedures */
+ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
+-int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
+ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
+ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred);
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -2030,12 +2030,10 @@ static int nfs4_handle_delegation_recall
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case -NFS4ERR_DEADSESSION:
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+ return -EAGAIN;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+ /* Don't recall a delegation if it was lost */
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ return -EAGAIN;
+@@ -2056,7 +2054,6 @@ static int nfs4_handle_delegation_recall
+ return -EAGAIN;
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_GRACE:
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+ ssleep(1);
+ return -EAGAIN;
+ case -ENOMEM:
+@@ -2072,8 +2069,7 @@ static int nfs4_handle_delegation_recall
+ }
+
+ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
+- struct nfs4_state *state, const nfs4_stateid *stateid,
+- fmode_t type)
++ struct nfs4_state *state, const nfs4_stateid *stateid)
+ {
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_opendata *opendata;
+@@ -2084,19 +2080,23 @@ int nfs4_open_delegation_recall(struct n
+ if (IS_ERR(opendata))
+ return PTR_ERR(opendata);
+ nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
+- nfs_state_clear_delegation(state);
+- switch (type & (FMODE_READ|FMODE_WRITE)) {
+- case FMODE_READ|FMODE_WRITE:
+- case FMODE_WRITE:
++ if (!test_bit(NFS_O_RDWR_STATE, &state->flags)) {
+ err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
+ if (err)
+- break;
++ goto out;
++ }
++ if (!test_bit(NFS_O_WRONLY_STATE, &state->flags)) {
+ err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
+ if (err)
+- break;
+- case FMODE_READ:
++ goto out;
++ }
++ if (!test_bit(NFS_O_RDONLY_STATE, &state->flags)) {
+ err = nfs4_open_recover_helper(opendata, FMODE_READ);
++ if (err)
++ goto out;
+ }
++ nfs_state_clear_delegation(state);
++out:
+ nfs4_opendata_put(opendata);
+ return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
+ }
diff --git a/patches.suse/NFSv4-Fix-lookup-revalidate-of-regular-files.patch b/patches.suse/NFSv4-Fix-lookup-revalidate-of-regular-files.patch
new file mode 100644
index 0000000000..8ff8a001e6
--- /dev/null
+++ b/patches.suse/NFSv4-Fix-lookup-revalidate-of-regular-files.patch
@@ -0,0 +1,148 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Fri, 28 Sep 2018 12:42:51 -0400
+Subject: [PATCH] NFSv4: Fix lookup revalidate of regular files
+Git-commit: c7944ebb9ce9461079659e9e6ec5baaf73724b3b
+Patch-mainline: v4.20
+References: git-fixes
+
+If we're revalidating an existing dentry in order to open a file, we need
+to ensure that we check the directory has not changed before we optimise
+away the lookup.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/dir.c | 79 +++++++++++++++++++++++++++++------------------------------
+ 1 file changed, 39 insertions(+), 40 deletions(-)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1218,7 +1218,8 @@ out_bad:
+ }
+
+ static int
+-nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
++ int (*reval)(struct inode *, struct dentry *, unsigned int))
+ {
+ struct dentry *parent;
+ struct inode *dir;
+@@ -1229,17 +1230,22 @@ nfs_lookup_revalidate(struct dentry *den
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+- ret = nfs_do_lookup_revalidate(dir, dentry, flags);
++ ret = reval(dir, dentry, flags);
+ if (parent != READ_ONCE(dentry->d_parent))
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+- ret = nfs_do_lookup_revalidate(d_inode(parent), dentry, flags);
++ ret = reval(d_inode(parent), dentry, flags);
+ dput(parent);
+ }
+ return ret;
+ }
+
++static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++{
++ return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
++}
++
+ /*
+ * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
+ * when we don't really care about the dentry name. This is called when a
+@@ -1588,62 +1594,55 @@ no_open:
+ }
+ EXPORT_SYMBOL_GPL(nfs_atomic_open);
+
+-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++static int
++nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
++ unsigned int flags)
+ {
+ struct inode *inode;
+- int ret = 0;
+
+ if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
+- goto no_open;
++ goto full_reval;
+ if (d_mountpoint(dentry))
+- goto no_open;
+- if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
+- goto no_open;
++ goto full_reval;
+
+ inode = d_inode(dentry);
+
+ /* We can't create new files in nfs_open_revalidate(), so we
+ * optimize away revalidation of negative dentries.
+ */
+- if (inode == NULL) {
+- struct dentry *parent;
+- struct inode *dir;
+-
+- if (flags & LOOKUP_RCU) {
+- parent = READ_ONCE(dentry->d_parent);
+- dir = d_inode_rcu(parent);
+- if (!dir)
+- return -ECHILD;
+- } else {
+- parent = dget_parent(dentry);
+- dir = d_inode(parent);
+- }
+- if (!nfs_neg_need_reval(dir, dentry, flags))
+- ret = 1;
+- else if (flags & LOOKUP_RCU)
+- ret = -ECHILD;
+- if (!(flags & LOOKUP_RCU))
+- dput(parent);
+- else if (parent != READ_ONCE(dentry->d_parent))
+- return -ECHILD;
+- goto out;
+- }
++ if (inode == NULL)
++ goto full_reval;
++
++ if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
++ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
+
+ /* NFS only supports OPEN on regular files */
+ if (!S_ISREG(inode->i_mode))
+- goto no_open;
++ goto full_reval;
++
+ /* We cannot do exclusive creation on a positive dentry */
+- if (flags & LOOKUP_EXCL)
+- goto no_open;
++ if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
++ goto reval_dentry;
++
++ /* Check if the directory changed */
++ if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
++ goto reval_dentry;
+
+ /* Let f_op->open() actually open (and revalidate) the file */
+- ret = 1;
++ return 1;
++reval_dentry:
++ if (flags & LOOKUP_RCU)
++ return -ECHILD;
++ return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
+
+-out:
+- return ret;
++full_reval:
++ return nfs_do_lookup_revalidate(dir, dentry, flags);
++}
+
+-no_open:
+- return nfs_lookup_revalidate(dentry, flags);
++static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
++{
++ return __nfs_lookup_revalidate(dentry, flags,
++ nfs4_do_lookup_revalidate);
+ }
+
+ #endif /* CONFIG_NFSV4 */
diff --git a/patches.suse/NFSv4-Handle-the-special-Linux-file-open-access-mode.patch b/patches.suse/NFSv4-Handle-the-special-Linux-file-open-access-mode.patch
new file mode 100644
index 0000000000..96172c6d7e
--- /dev/null
+++ b/patches.suse/NFSv4-Handle-the-special-Linux-file-open-access-mode.patch
@@ -0,0 +1,47 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Thu, 27 Jun 2019 06:41:45 -0400
+Subject: [PATCH] NFSv4: Handle the special Linux file open access mode
+Git-commit: 44942b4e457beda00981f616402a1a791e8c616e
+Patch-mainline: v5.3
+References: git-fixes
+
+According to the open() manpage, Linux reserves the access mode 3
+to mean "check for read and write permission on the file and return
+a file descriptor that can't be used for reading or writing."
+
+Currently, the NFSv4 code will ask the server to open the file,
+and will use an incorrect share access mode of 0. Since it has
+an incorrect share access mode, the client later forgets to send
+a corresponding close, meaning it can leak stateids on the server.
+
+Fixes: ce4ef7c0a8a05 ("NFS: Split out NFS v4 file operations")
+Cc: stable@vger.kernel.org # 3.6+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/inode.c | 1 +
+ fs/nfs/nfs4file.c | 2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -1006,6 +1006,7 @@ int nfs_open(struct inode *inode, struct
+ nfs_fscache_open_file(inode, filp);
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(nfs_open);
+
+ /*
+ * This function is called whenever some part of NFS notices that
+--- a/fs/nfs/nfs4file.c
++++ b/fs/nfs/nfs4file.c
+@@ -49,7 +49,7 @@ nfs4_file_open(struct inode *inode, stru
+ return err;
+
+ if ((openflags & O_ACCMODE) == 3)
+- openflags--;
++ return nfs_open(inode, filp);
+
+ /* We can't create new files here */
+ openflags &= ~(O_CREAT|O_EXCL);
diff --git a/patches.suse/NFSv4-Only-pass-the-delegation-to-setattr-if-we-re-s.patch b/patches.suse/NFSv4-Only-pass-the-delegation-to-setattr-if-we-re-s.patch
new file mode 100644
index 0000000000..b3297c408e
--- /dev/null
+++ b/patches.suse/NFSv4-Only-pass-the-delegation-to-setattr-if-we-re-s.patch
@@ -0,0 +1,57 @@
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Mon, 9 Apr 2018 11:15:30 -0400
+Subject: [PATCH] NFSv4: Only pass the delegation to setattr if we're sending a
+ truncate
+Git-commit: 991eedb1371dc09b0f9848f59c8898fe63d198c0
+Patch-mainline: v4.18
+References: git-fixes
+
+Even then it isn't really necessary. The reason why we may not want to
+pass in a stateid in other cases is that we cannot use the delegation
+credential.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4proc.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3001,7 +3001,6 @@ static int _nfs4_do_setattr(struct inode
+ };
+ struct rpc_cred *delegation_cred = NULL;
+ unsigned long timestamp = jiffies;
+- fmode_t fmode;
+ bool truncate;
+ int status;
+
+@@ -3009,11 +3008,12 @@ static int _nfs4_do_setattr(struct inode
+
+ /* Servers should only apply open mode checks for file size changes */
+ truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false;
+- fmode = truncate ? FMODE_WRITE : FMODE_READ;
++ if (!truncate)
++ goto zero_stateid;
+
+- if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) {
++ if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
+ /* Use that stateid */
+- } else if (truncate && ctx != NULL) {
++ } else if (ctx != NULL) {
+ struct nfs_lock_context *l_ctx;
+ if (!nfs4_valid_open_stateid(ctx->state))
+ return -EBADF;
+@@ -3025,8 +3025,10 @@ static int _nfs4_do_setattr(struct inode
+ nfs_put_lock_context(l_ctx);
+ if (status == -EIO)
+ return -EBADF;
+- } else
++ } else {
++zero_stateid:
+ nfs4_stateid_copy(&arg->stateid, &zero_stateid);
++ }
+ if (delegation_cred)
+ msg.rpc_cred = delegation_cred;
+
diff --git a/patches.suse/NFSv4-pnfs-Fix-a-page-lock-leak-in-nfs_pageio_resend.patch b/patches.suse/NFSv4-pnfs-Fix-a-page-lock-leak-in-nfs_pageio_resend.patch
new file mode 100644
index 0000000000..4a38c85f6c
--- /dev/null
+++ b/patches.suse/NFSv4-pnfs-Fix-a-page-lock-leak-in-nfs_pageio_resend.patch
@@ -0,0 +1,52 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Mon, 12 Aug 2019 15:19:54 -0400
+Subject: [PATCH] NFSv4/pnfs: Fix a page lock leak in nfs_pageio_resend()
+Git-commit: f4340e9314dbfadc48758945f85fc3b16612d06f
+Patch-mainline: v5.3
+References: git-fixes
+
+If the attempt to resend the pages fails, we need to ensure that we
+clean up those pages that were not transmitted.
+
+Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org # v4.5+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/pagelist.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/nfs/pagelist.c
++++ b/fs/nfs/pagelist.c
+@@ -1252,21 +1252,23 @@ static void nfs_pageio_complete_mirror(s
+ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+ {
+- LIST_HEAD(failed);
++ LIST_HEAD(pages);
+
+ desc->pg_io_completion = hdr->io_completion;
+ desc->pg_dreq = hdr->dreq;
+- while (!list_empty(&hdr->pages)) {
+- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
++ list_splice_init(&hdr->pages, &pages);
++ while (!list_empty(&pages)) {
++ struct nfs_page *req = nfs_list_entry(pages.next);
+
+ nfs_list_remove_request(req);
+ if (!nfs_pageio_add_request(desc, req))
+- nfs_list_add_request(req, &failed);
++ break;
+ }
+ nfs_pageio_complete(desc);
+- if (!list_empty(&failed)) {
+- list_move(&failed, &hdr->pages);
+- return desc->pg_error < 0 ? desc->pg_error : -EIO;
++ if (!list_empty(&pages)) {
++ int err = desc->pg_error < 0 ? desc->pg_error : -EIO;
++ hdr->completion_ops->error_cleanup(&pages);
++ return err;
+ }
+ return 0;
+ }
diff --git a/patches.suse/NFSv4.1-Again-fix-a-race-where-CB_NOTIFY_LOCK-fails-.patch b/patches.suse/NFSv4.1-Again-fix-a-race-where-CB_NOTIFY_LOCK-fails-.patch
new file mode 100644
index 0000000000..3fab166d42
--- /dev/null
+++ b/patches.suse/NFSv4.1-Again-fix-a-race-where-CB_NOTIFY_LOCK-fails-.patch
@@ -0,0 +1,89 @@
+From: Yihao Wu <wuyihao@linux.alibaba.com>
+Date: Wed, 22 May 2019 01:57:10 +0800
+Subject: [PATCH] NFSv4.1: Again fix a race where CB_NOTIFY_LOCK fails to wake
+ a waiter
+Git-commit: 52b042ab9948cc367b61f9ca9c18603aa7813c3a
+Patch-mainline: v5.2
+References: git-fixes
+
+Commit b7dbcc0e433f "NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails to wake a waiter"
+found this bug. However it didn't fix it.
+
+This commit replaces schedule_timeout() with wait_woken() and
+default_wake_function() with woken_wake_function() in function
+nfs4_retry_setlk() and nfs4_wake_lock_waiter(). wait_woken() uses
+memory barriers in its implementation to avoid potential race condition
+when putting a process into sleeping state and then waking it up.
+
+Fixes: a1d617d8f134 ("nfs: allow blocking locks to be awoken by lock callbacks")
+Cc: stable@vger.kernel.org #4.9+
+Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+---
+ fs/nfs/nfs4proc.c | 23 +++++++----------------
+ 1 file changed, 7 insertions(+), 16 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -6534,7 +6534,6 @@ struct nfs4_lock_waiter {
+ struct task_struct *task;
+ struct inode *inode;
+ struct nfs_lowner *owner;
+- bool notified;
+ };
+
+ static int
+@@ -6556,11 +6555,11 @@ nfs4_wake_lock_waiter(wait_queue_entry_t
+ if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
+ return 0;
+
+- waiter->notified = true;
+-
+ /* override "private" so we can use default_wake_function */
+ wait->private = waiter->task;
+- ret = autoremove_wake_function(wait, mode, flags, key);
++ ret = woken_wake_function(wait, mode, flags, key);
++ if (ret)
++ list_del_init(&wait->entry);
+ wait->private = waiter;
+ return ret;
+ }
+@@ -6569,7 +6568,6 @@ static int
+ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+ {
+ int status = -ERESTARTSYS;
+- unsigned long flags;
+ struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs_client *clp = server->nfs_client;
+@@ -6579,8 +6577,7 @@ nfs4_retry_setlk(struct nfs4_state *stat
+ .s_dev = server->s_dev };
+ struct nfs4_lock_waiter waiter = { .task = current,
+ .inode = state->inode,
+- .owner = &owner,
+- .notified = false };
++ .owner = &owner};
+ wait_queue_entry_t wait;
+
+ /* Don't bother with waitqueue if we don't expect a callback */
+@@ -6598,15 +6595,9 @@ nfs4_retry_setlk(struct nfs4_state *stat
+ break;
+
+ status = -ERESTARTSYS;
+- spin_lock_irqsave(&q->lock, flags);
+- if (waiter.notified) {
+- spin_unlock_irqrestore(&q->lock, flags);
+- continue;
+- }
+- set_current_state(TASK_INTERRUPTIBLE);
+- spin_unlock_irqrestore(&q->lock, flags);
+-
+- freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT);
++ freezer_do_not_count();
++ wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
++ freezer_count();
+ }
+
+ finish_wait(q, &wait);
diff --git a/patches.suse/NFSv4.1-Fix-open-stateid-recovery.patch b/patches.suse/NFSv4.1-Fix-open-stateid-recovery.patch
new file mode 100644
index 0000000000..1d1ba42ce9
--- /dev/null
+++ b/patches.suse/NFSv4.1-Fix-open-stateid-recovery.patch
@@ -0,0 +1,168 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Mon, 22 Jul 2019 18:32:59 +0100
+Subject: [PATCH] NFSv4.1: Fix open stateid recovery
+Git-commit: 27a30cf64a5cbe2105e4ff9613246b32d584766a
+Patch-mainline: v5.3
+References: git-fixes
+
+The logic for checking in nfs41_check_open_stateid() whether the state
+is supported by a delegation is inverted. In addition, it makes more
+sense to perform that check before we check for expired locks.
+
+Fixes: 8a64c4ef106d1 ("NFSv4.1: Even if the stateid is OK,...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/nfs4proc.c | 65 +++++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 38 insertions(+), 27 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -1566,6 +1566,14 @@ static void nfs_set_open_stateid_locked(
+ nfs_state_log_update_open_stateid(state);
+ }
+
++static void nfs_state_clear_open_state_flags(struct nfs4_state *state)
++{
++ clear_bit(NFS_O_RDWR_STATE, &state->flags);
++ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
++ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
++ clear_bit(NFS_OPEN_STATE, &state->flags);
++}
++
+ static void nfs_state_set_open_stateid(struct nfs4_state *state,
+ const nfs4_stateid *open_stateid,
+ fmode_t fmode,
+@@ -1927,13 +1935,7 @@ static int nfs4_open_recover(struct nfs4
+ {
+ int ret;
+
+- /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
+- clear_bit(NFS_O_RDWR_STATE, &state->flags);
+- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ /* memory barrier prior to reading state->n_* */
+- clear_bit(NFS_DELEGATED_STATE, &state->flags);
+- clear_bit(NFS_OPEN_STATE, &state->flags);
+ smp_rmb();
+ ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
+ if (ret != 0)
+@@ -2009,6 +2011,8 @@ static int nfs4_open_reclaim(struct nfs4
+ ctx = nfs4_state_find_open_context(state);
+ if (IS_ERR(ctx))
+ return -EAGAIN;
++ clear_bit(NFS_DELEGATED_STATE, &state->flags);
++ nfs_state_clear_open_state_flags(state);
+ ret = nfs4_do_open_reclaim(ctx, state);
+ put_nfs_open_context(ctx);
+ return ret;
+@@ -2543,6 +2547,7 @@ static int nfs40_open_expired(struct nfs
+ {
+ /* NFSv4.0 doesn't allow for delegation recovery on open expire */
+ nfs40_clear_delegation_stateid(state);
++ nfs_state_clear_open_state_flags(state);
+ return nfs4_open_expired(sp, state);
+ }
+
+@@ -2585,13 +2590,13 @@ out_free:
+ return -NFS4ERR_EXPIRED;
+ }
+
+-static void nfs41_check_delegation_stateid(struct nfs4_state *state)
++static int nfs41_check_delegation_stateid(struct nfs4_state *state)
+ {
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ nfs4_stateid stateid;
+ struct nfs_delegation *delegation;
+ struct rpc_cred *cred;
+- int status;
++ int status, ret = NFS_OK;
+
+ /* Get the delegation credential for use by test/free_stateid */
+ rcu_read_lock();
+@@ -2599,20 +2604,15 @@ static void nfs41_check_delegation_state
+ if (delegation == NULL) {
+ rcu_read_unlock();
+ nfs_state_clear_delegation(state);
+- return;
++ return NFS_OK;
+ }
+
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
+- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+- rcu_read_unlock();
+- nfs_state_clear_delegation(state);
+- return;
+- }
+
+ if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+ &delegation->flags)) {
+ rcu_read_unlock();
+- return;
++ return NFS_OK;
+ }
+
+ cred = get_rpccred(delegation->cred);
+@@ -2621,8 +2621,24 @@ static void nfs41_check_delegation_state
+ trace_nfs4_test_delegation_stateid(state, NULL, status);
+ if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
+ nfs_finish_clear_delegation_stateid(state, &stateid);
++ else
++ ret = status;
+
+ put_rpccred(cred);
++ return ret;
++}
++
++static void nfs41_delegation_recover_stateid(struct nfs4_state *state)
++{
++ nfs4_stateid tmp;
++
++ if (test_bit(NFS_DELEGATED_STATE, &state->flags) &&
++ nfs4_copy_delegation_stateid(state->inode, state->state,
++ &tmp, NULL) &&
++ nfs4_stateid_match_other(&state->stateid, &tmp))
++ nfs_state_set_delegation(state, &tmp, state->state);
++ else
++ nfs_state_clear_delegation(state);
+ }
+
+ /**
+@@ -2692,21 +2708,12 @@ static int nfs41_check_open_stateid(stru
+ struct rpc_cred *cred = state->owner->so_cred;
+ int status;
+
+- if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) {
+- if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) {
+- if (nfs4_have_delegation(state->inode, state->state))
+- return NFS_OK;
+- return -NFS4ERR_OPENMODE;
+- }
++ if (test_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ return -NFS4ERR_BAD_STATEID;
+- }
+ status = nfs41_test_and_free_expired_stateid(server, stateid, cred);
+ trace_nfs4_test_open_stateid(state, NULL, status);
+ if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) {
+- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+- clear_bit(NFS_O_RDWR_STATE, &state->flags);
+- clear_bit(NFS_OPEN_STATE, &state->flags);
++ nfs_state_clear_open_state_flags(state);
+ stateid->type = NFS4_INVALID_STATEID_TYPE;
+ }
+ if (status != NFS_OK)
+@@ -2720,7 +2727,11 @@ static int nfs41_open_expired(struct nfs
+ {
+ int status;
+
+- nfs41_check_delegation_stateid(state);
++ status = nfs41_check_delegation_stateid(state);
++ if (status != NFS_OK)
++ return status;
++ nfs41_delegation_recover_stateid(state);
++
+ status = nfs41_check_expired_locks(state);
+ if (status != NFS_OK)
+ return status;
diff --git a/patches.suse/NFSv4.1-Only-reap-expired-delegations.patch b/patches.suse/NFSv4.1-Only-reap-expired-delegations.patch
new file mode 100644
index 0000000000..fab73c27e3
--- /dev/null
+++ b/patches.suse/NFSv4.1-Only-reap-expired-delegations.patch
@@ -0,0 +1,64 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Fri, 26 Jul 2019 14:40:53 +0100
+Subject: [PATCH] NFSv4.1: Only reap expired delegations
+Git-commit: ad11408970df79d5f481aa9964e91f183133424c
+Patch-mainline: v5.3
+References: git-fixes
+
+Fix nfs_reap_expired_delegations() to ensure that we only reap delegations
+that are actually expired, rather than triggering on random errors.
+
+Fixes: 45870d6909d5a ("NFSv4.1: Test delegation stateids when server...")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/delegation.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/fs/nfs/delegation.c
++++ b/fs/nfs/delegation.c
+@@ -991,6 +991,22 @@ void nfs_mark_test_expired_all_delegatio
+ rcu_read_unlock();
+ }
+
++static void
++nfs_delegation_test_free_expired(struct inode *inode,
++ nfs4_stateid *stateid,
++ struct rpc_cred *cred)
++{
++ struct nfs_server *server = NFS_SERVER(inode);
++ const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops;
++ int status;
++
++ if (!cred)
++ return;
++ status = ops->test_and_free_expired(server, stateid, cred);
++ if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
++ nfs_remove_bad_delegation(inode, stateid);
++}
++
+ /**
+ * nfs_reap_expired_delegations - reap expired delegations
+ * @clp: nfs_client to process
+@@ -1002,7 +1018,6 @@ void nfs_mark_test_expired_all_delegatio
+ */
+ void nfs_reap_expired_delegations(struct nfs_client *clp)
+ {
+- const struct nfs4_minor_version_ops *ops = clp->cl_mvops;
+ struct nfs_delegation *delegation;
+ struct nfs_server *server;
+ struct inode *inode;
+@@ -1033,11 +1048,7 @@ restart:
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
+ clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
+ rcu_read_unlock();
+- if (cred != NULL &&
+- ops->test_and_free_expired(server, &stateid, cred) < 0) {
+- nfs_revoke_delegation(inode, &stateid);
+- nfs_inode_find_state_and_recover(inode, &stateid);
+- }
++ nfs_delegation_test_free_expired(inode, &stateid, cred);
+ put_rpccred(cred);
+ if (nfs4_server_rebooted(clp)) {
+ nfs_inode_mark_test_expired_delegation(server,inode);
diff --git a/patches.suse/PNFS-fallback-to-MDS-if-no-deviceid-found.patch b/patches.suse/PNFS-fallback-to-MDS-if-no-deviceid-found.patch
new file mode 100644
index 0000000000..4876e0d10a
--- /dev/null
+++ b/patches.suse/PNFS-fallback-to-MDS-if-no-deviceid-found.patch
@@ -0,0 +1,32 @@
+From: Olga Kornievskaia <kolga@netapp.com>
+Date: Tue, 7 May 2019 13:41:49 -0400
+Subject: [PATCH] PNFS fallback to MDS if no deviceid found
+Git-commit: b1029c9bc078a6f1515f55dd993b507dcc7e3440
+Patch-mainline: v5.2
+References: git-fixes
+
+If we fail to find a good deviceid while trying to pnfs instead of
+propogating an error back fallback to doing IO to the MDS. Currently,
+code with fals the IO with EINVAL.
+
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Fixes: 8d40b0f14846f ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes"
+Cc: stable@vger.kernel.org # v4.11+
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/filelayout/filelayout.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/filelayout/filelayout.c
++++ b/fs/nfs/filelayout/filelayout.c
+@@ -932,7 +932,7 @@ fl_pnfs_update_layout(struct inode *ino,
+ status = filelayout_check_deviceid(lo, fl, gfp_flags);
+ if (status) {
+ pnfs_put_lseg(lseg);
+- lseg = ERR_PTR(status);
++ lseg = NULL;
+ }
+ out:
+ return lseg;
diff --git a/patches.suse/SUNRPC-Handle-connection-breakages-correctly-in-call.patch b/patches.suse/SUNRPC-Handle-connection-breakages-correctly-in-call.patch
new file mode 100644
index 0000000000..35bf86b3df
--- /dev/null
+++ b/patches.suse/SUNRPC-Handle-connection-breakages-correctly-in-call.patch
@@ -0,0 +1,29 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Fri, 16 Aug 2019 08:58:48 -0400
+Subject: [PATCH] SUNRPC: Handle connection breakages correctly in
+ call_status()
+Git-commit: c82e5472c9980e0e483f4b689044150eefaca408
+Patch-mainline: v5.3
+References: git-fixes
+
+If the connection breaks while we're waiting for a reply from the
+server, then we want to immediately try to reconnect.
+
+Fixes: ec6017d90359 ("SUNRPC fix regression in umount of a secure mount")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/clnt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2177,6 +2177,7 @@ call_status(struct rpc_task *task)
+ case -ECONNABORTED:
+ case -ENOTCONN:
+ rpc_force_rebind(clnt);
++ break;
+ case -EADDRINUSE:
+ rpc_delay(task, 3*HZ);
+ case -EPIPE:
diff --git a/patches.suse/SUNRPC-fix-regression-in-umount-of-a-secure-mount.patch b/patches.suse/SUNRPC-fix-regression-in-umount-of-a-secure-mount.patch
new file mode 100644
index 0000000000..41599d0512
--- /dev/null
+++ b/patches.suse/SUNRPC-fix-regression-in-umount-of-a-secure-mount.patch
@@ -0,0 +1,36 @@
+From: Olga Kornievskaia <kolga@netapp.com>
+Date: Wed, 29 May 2019 10:46:00 -0400
+Subject: [PATCH] SUNRPC fix regression in umount of a secure mount
+Git-commit: ec6017d9035986a36de064f48a63245930bfad6f
+Patch-mainline: v5.2
+References: git-fixes
+
+If call_status returns ENOTCONN, we need to re-establish the connection
+state after. Otherwise the client goes into an infinite loop of call_encode,
+call_transmit, call_status (ENOTCONN), call_encode.
+
+Fixes: c8485e4d63 ("SUNRPC: Handle ECONNREFUSED correctly in xprt_transmit()")
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Cc: stable@vger.kernel.org # v2.6.29+
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ net/sunrpc/clnt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2175,11 +2175,11 @@ call_status(struct rpc_task *task)
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ case -ECONNABORTED:
++ case -ENOTCONN:
+ rpc_force_rebind(clnt);
+ case -EADDRINUSE:
+ rpc_delay(task, 3*HZ);
+ case -EPIPE:
+- case -ENOTCONN:
+ task->tk_action = call_bind;
+ break;
+ case -ENOBUFS:
diff --git a/patches.suse/SUNRPC-nfs-Fix-return-value-for-nfs4_callback_compou.patch b/patches.suse/SUNRPC-nfs-Fix-return-value-for-nfs4_callback_compou.patch
new file mode 100644
index 0000000000..c33bb8e4f0
--- /dev/null
+++ b/patches.suse/SUNRPC-nfs-Fix-return-value-for-nfs4_callback_compou.patch
@@ -0,0 +1,103 @@
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Tue, 9 Apr 2019 11:46:14 -0400
+Subject: [PATCH] SUNRPC/nfs: Fix return value for nfs4_callback_compound()
+Git-commit: 83dd59a0b9afc3b1a2642fb5c9b0585b1c08768f
+Patch-mainline: v5.2
+References: git-fixes
+
+RPC server procedures are normally expected to return a __be32 encoded
+status value of type 'enum rpc_accept_stat', however at least one function
+wants to return an authentication status of type 'enum rpc_auth_stat'
+in the case where authentication fails.
+This patch adds functionality to allow this.
+
+Fixes: a4e187d83d88 ("NFS: Don't drop CB requests with invalid principals")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/callback_xdr.c | 2 +-
+ include/linux/sunrpc/svc.h | 2 ++
+ net/sunrpc/svc.c | 27 ++++++++++++++++++++++-----
+ 3 files changed, 25 insertions(+), 6 deletions(-)
+
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -931,7 +931,7 @@ static __be32 nfs4_callback_compound(str
+
+ out_invalidcred:
+ pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
+- return rpc_autherr_badcred;
++ return svc_return_autherr(rqstp, rpc_autherr_badcred);
+ }
+
+ /*
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -269,6 +269,7 @@ struct svc_rqst {
+ #define RQ_VICTIM (5) /* about to be shut down */
+ #define RQ_BUSY (6) /* request is busy */
+ #define RQ_DATA (7) /* request has data */
++#define RQ_AUTHERR (8) /* Request status is auth error */
+ unsigned long rq_flags; /* flags field */
+
+ void * rq_argp; /* decoded arguments */
+@@ -493,6 +494,7 @@ void svc_wake_up(struct svc_serv *);
+ void svc_reserve(struct svc_rqst *rqstp, int space);
+ struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
+ char * svc_print_addr(struct svc_rqst *, char *, size_t);
++__be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err);
+
+ #define RPC_MAX_ADDRBUFLEN (63U)
+
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1144,6 +1144,22 @@ void svc_printk(struct svc_rqst *rqstp,
+ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
+ #endif
+
++__be32
++svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
++{
++ set_bit(RQ_AUTHERR, &rqstp->rq_flags);
++ return auth_err;
++}
++EXPORT_SYMBOL_GPL(svc_return_autherr);
++
++static __be32
++svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
++{
++ if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
++ return *statp;
++ return rpc_auth_ok;
++}
++
+ /*
+ * Common routine for processing the RPC request.
+ */
+@@ -1295,11 +1311,9 @@ svc_process_common(struct svc_rqst *rqst
+ procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+ goto dropit;
+ }
+- if (*statp == rpc_autherr_badcred) {
+- if (procp->pc_release)
+- procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+- goto err_bad_auth;
+- }
++ auth_stat = svc_get_autherr(rqstp, statp);
++ if (auth_stat != rpc_auth_ok)
++ goto err_release_bad_auth;
+ if (*statp == rpc_success &&
+ (xdr = procp->pc_encode) &&
+ !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+@@ -1357,6 +1371,9 @@ err_bad_rpc:
+ svc_putnl(resv, 2);
+ goto sendit;
+
++err_release_bad_auth:
++ if (procp->pc_release)
++ procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+ err_bad_auth:
+ dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
+ serv->sv_stats->rpcbadauth++;
diff --git a/patches.suse/ceph-use-ceph_evict_inode-to-cleanup-inode-s-resource.patch b/patches.suse/ceph-use-ceph_evict_inode-to-cleanup-inode-s-resource.patch
new file mode 100644
index 0000000000..daf9331c08
--- /dev/null
+++ b/patches.suse/ceph-use-ceph_evict_inode-to-cleanup-inode-s-resource.patch
@@ -0,0 +1,90 @@
+From: "Yan, Zheng" <zyan@redhat.com>
+Date: Tue, 1 Oct 2019 17:24:25 -0400
+Subject: ceph: use ceph_evict_inode to cleanup inode's resource
+Git-commit: 87bc5b895d94a0f40fe170d4cf5771c8e8f85d15
+Patch-mainline: v5.3-rc1
+References: bsc#1148133
+
+[ Upstream commit 87bc5b895d94a0f40fe170d4cf5771c8e8f85d15 ]
+
+remove_session_caps() relies on __wait_on_freeing_inode(), to wait for
+freeing inode to remove its caps. But VFS wakes freeing inode waiters
+before calling destroy_inode().
+
+[ jlayton: mainline moved to ->free_inode before the original patch was
+ merged. This backport reinstates ceph_destroy_inode and just
+ has it do the call_rcu call. ]
+
+Cc: stable@vger.kernel.org
+Link: https://tracker.ceph.com/issues/40102
+Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+[luis: cherry-pick from stable linux-v4.19]
+---
+ fs/ceph/inode.c | 10 ++++++++--
+ fs/ceph/super.c | 1 +
+ fs/ceph/super.h | 1 +
+ 3 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
+index 11f19432a74c..c06845237cba 100644
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -528,13 +528,16 @@ static void ceph_i_callback(struct rcu_head *head)
+ kmem_cache_free(ceph_inode_cachep, ci);
+ }
+
+-void ceph_destroy_inode(struct inode *inode)
++void ceph_evict_inode(struct inode *inode)
+ {
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_inode_frag *frag;
+ struct rb_node *n;
+
+- dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
++ dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
++
++ truncate_inode_pages_final(&inode->i_data);
++ clear_inode(inode);
+
+ ceph_fscache_unregister_inode_cookie(ci);
+
+@@ -576,7 +579,10 @@ void ceph_destroy_inode(struct inode *inode)
+ ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+
+ ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns));
++}
+
++void ceph_destroy_inode(struct inode *inode)
++{
+ call_rcu(&inode->i_rcu, ceph_i_callback);
+ }
+
+diff --git a/fs/ceph/super.c b/fs/ceph/super.c
+index c5cf46e43f2e..ccab249a37f6 100644
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -830,6 +830,7 @@ static const struct super_operations ceph_super_ops = {
+ .destroy_inode = ceph_destroy_inode,
+ .write_inode = ceph_write_inode,
+ .drop_inode = ceph_drop_inode,
++ .evict_inode = ceph_evict_inode,
+ .sync_fs = ceph_sync_fs,
+ .put_super = ceph_put_super,
+ .remount_fs = ceph_remount,
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 018019309790..8d3eabf06d66 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -854,6 +854,7 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
+ extern const struct inode_operations ceph_file_iops;
+
+ extern struct inode *ceph_alloc_inode(struct super_block *sb);
++extern void ceph_evict_inode(struct inode *inode);
+ extern void ceph_destroy_inode(struct inode *inode);
+ extern int ceph_drop_inode(struct inode *inode);
+
+
diff --git a/patches.suse/eeprom-at24-make-spd-world-readable-again.patch b/patches.suse/eeprom-at24-make-spd-world-readable-again.patch
new file mode 100644
index 0000000000..b3ccbb5375
--- /dev/null
+++ b/patches.suse/eeprom-at24-make-spd-world-readable-again.patch
@@ -0,0 +1,37 @@
+From: Jean Delvare <jdelvare@suse.de>
+Date: Sun, 28 Jul 2019 18:41:38 +0200
+Subject: eeprom: at24: make spd world-readable again
+Git-commit: 25e5ef302c24a6fead369c0cfe88c073d7b97ca8
+Patch-mainline: v5.3
+References: git-fixes
+
+The integration of the at24 driver into the nvmem framework broke the
+world-readability of spd EEPROMs. Fix it.
+
+Signed-off-by: Jean Delvare <jdelvare@suse.de>
+Cc: stable@vger.kernel.org
+Fixes: 57d155506dd5 ("eeprom: at24: extend driver to plug into the NVMEM framework")
+Cc: Andrew Lunn <andrew@lunn.ch>
+Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Bartosz Golaszewski <brgl@bgdev.pl>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+[Bartosz: backported the patch to older branches]
+Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/misc/eeprom/at24.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/misc/eeprom/at24.c
++++ b/drivers/misc/eeprom/at24.c
+@@ -769,7 +769,7 @@ static int at24_probe(struct i2c_client
+ at24->nvmem_config.name = dev_name(&client->dev);
+ at24->nvmem_config.dev = &client->dev;
+ at24->nvmem_config.read_only = !writable;
+- at24->nvmem_config.root_only = true;
++ at24->nvmem_config.root_only = !(chip.flags & AT24_FLAG_IRUGO);
+ at24->nvmem_config.owner = THIS_MODULE;
+ at24->nvmem_config.compat = true;
+ at24->nvmem_config.base_dev = &client->dev;
diff --git a/patches.suse/ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch b/patches.suse/ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch
deleted file mode 100644
index 7ca190f8d2..0000000000
--- a/patches.suse/ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch
+++ /dev/null
@@ -1,182 +0,0 @@
-From: Petr Mladek <pmladek@suse.com>
-Date: Thu, 27 Jun 2019 10:13:34 +0200
-Subject: ftrace/x86: Remove possible deadlock between register_kprobe() and
- ftrace_run_update_code()
-Git-commit: d5b844a2cf507fc7642c9ae80a9d585db3065c28
-Patch-mainline: v5.2
-References: bsc#1071995 fate#323487
-
-The commit 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module text
-permissions race") causes a possible deadlock between register_kprobe()
-and ftrace_run_update_code() when ftrace is using stop_machine().
-
-The existing dependency chain (in reverse order) is:
-
--> #1 (text_mutex){+.+.}:
- validate_chain.isra.21+0xb32/0xd70
- __lock_acquire+0x4b8/0x928
- lock_acquire+0x102/0x230
- __mutex_lock+0x88/0x908
- mutex_lock_nested+0x32/0x40
- register_kprobe+0x254/0x658
- init_kprobes+0x11a/0x168
- do_one_initcall+0x70/0x318
- kernel_init_freeable+0x456/0x508
- kernel_init+0x22/0x150
- ret_from_fork+0x30/0x34
- kernel_thread_starter+0x0/0xc
-
--> #0 (cpu_hotplug_lock.rw_sem){++++}:
- check_prev_add+0x90c/0xde0
- validate_chain.isra.21+0xb32/0xd70
- __lock_acquire+0x4b8/0x928
- lock_acquire+0x102/0x230
- cpus_read_lock+0x62/0xd0
- stop_machine+0x2e/0x60
- arch_ftrace_update_code+0x2e/0x40
- ftrace_run_update_code+0x40/0xa0
- ftrace_startup+0xb2/0x168
- register_ftrace_function+0x64/0x88
- klp_patch_object+0x1a2/0x290
- klp_enable_patch+0x554/0x980
- do_one_initcall+0x70/0x318
- do_init_module+0x6e/0x250
- load_module+0x1782/0x1990
- __s390x_sys_finit_module+0xaa/0xf0
- system_call+0xd8/0x2d0
-
- Possible unsafe locking scenario:
-
- CPU0 CPU1
- ---- ----
- lock(text_mutex);
- lock(cpu_hotplug_lock.rw_sem);
- lock(text_mutex);
- lock(cpu_hotplug_lock.rw_sem);
-
-It is similar problem that has been solved by the commit 2d1e38f56622b9b
-("kprobes: Cure hotplug lock ordering issues"). Many locks are involved.
-To be on the safe side, text_mutex must become a low level lock taken
-after cpu_hotplug_lock.rw_sem.
-
-This can't be achieved easily with the current ftrace design.
-For example, arm calls set_all_modules_text_rw() already in
-ftrace_arch_code_modify_prepare(), see arch/arm/kernel/ftrace.c.
-This functions is called:
-
- + outside stop_machine() from ftrace_run_update_code()
- + without stop_machine() from ftrace_module_enable()
-
-Fortunately, the problematic fix is needed only on x86_64. It is
-the only architecture that calls set_all_modules_text_rw()
-in ftrace path and supports livepatching at the same time.
-
-Therefore it is enough to move text_mutex handling from the generic
-kernel/trace/ftrace.c into arch/x86/kernel/ftrace.c:
-
- ftrace_arch_code_modify_prepare()
- ftrace_arch_code_modify_post_process()
-
-This patch basically reverts the ftrace part of the problematic
-commit 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module
-text permissions race"). And provides x86_64 specific-fix.
-
-Some refactoring of the ftrace code will be needed when livepatching
-is implemented for arm or nds32. These architectures call
-set_all_modules_text_rw() and use stop_machine() at the same time.
-
-Link: http://lkml.kernel.org/r/20190627081334.12793-1-pmladek@suse.com
-
-Fixes: 9f255b632bf12c4dd7 ("module: Fix livepatch/ftrace module text permissions race")
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Reported-by: Miroslav Benes <mbenes@suse.cz>
-Reviewed-by: Miroslav Benes <mbenes@suse.cz>
-Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Signed-off-by: Petr Mladek <pmladek@suse.com>
-[
- As reviewed by Miroslav Benes <mbenes@suse.cz>, removed return value of
- ftrace_run_update_code() as it is a void function.
-]
-Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-Acked-by: Miroslav Benes <mbenes@suse.cz>
----
- arch/x86/kernel/ftrace.c | 3 +++
- kernel/trace/ftrace.c | 10 +---------
- 2 files changed, 4 insertions(+), 9 deletions(-)
-
---- a/arch/x86/kernel/ftrace.c
-+++ b/arch/x86/kernel/ftrace.c
-@@ -21,6 +21,7 @@
- #include <linux/init.h>
- #include <linux/list.h>
- #include <linux/module.h>
-+#include <linux/memory.h>
-
- #include <trace/syscall.h>
-
-@@ -34,6 +35,7 @@
-
- int ftrace_arch_code_modify_prepare(void)
- {
-+ mutex_lock(&text_mutex);
- set_kernel_text_rw();
- set_all_modules_text_rw();
- return 0;
-@@ -43,6 +45,7 @@ int ftrace_arch_code_modify_post_process
- {
- set_all_modules_text_ro();
- set_kernel_text_ro();
-+ mutex_unlock(&text_mutex);
- return 0;
- }
-
---- a/kernel/trace/ftrace.c
-+++ b/kernel/trace/ftrace.c
-@@ -33,7 +33,6 @@
- #include <linux/list.h>
- #include <linux/hash.h>
- #include <linux/rcupdate.h>
--#include <linux/memory.h>
-
- #include <trace/events/sched.h>
-
-@@ -2630,12 +2629,10 @@ static void ftrace_run_update_code(int c
- {
- int ret;
-
-- mutex_lock(&text_mutex);
--
- ret = ftrace_arch_code_modify_prepare();
- FTRACE_WARN_ON(ret);
- if (ret)
-- goto out_unlock;
-+ return;
-
- /*
- * By default we use stop_machine() to modify the code.
-@@ -2647,9 +2644,6 @@ static void ftrace_run_update_code(int c
-
- ret = ftrace_arch_code_modify_post_process();
- FTRACE_WARN_ON(ret);
--
--out_unlock:
-- mutex_unlock(&text_mutex);
- }
-
- static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
-@@ -5415,7 +5409,6 @@ void ftrace_module_enable(struct module
- struct ftrace_page *pg;
-
- mutex_lock(&ftrace_lock);
-- mutex_lock(&text_mutex);
-
- if (ftrace_disabled)
- goto out_unlock;
-@@ -5476,7 +5469,6 @@ void ftrace_module_enable(struct module
- ftrace_arch_code_modify_post_process();
-
- out_unlock:
-- mutex_unlock(&text_mutex);
- mutex_unlock(&ftrace_lock);
- }
-
diff --git a/patches.suse/module-fix-livepatch-ftrace-module-text-permissions-race.patch b/patches.suse/module-fix-livepatch-ftrace-module-text-permissions-race.patch
deleted file mode 100644
index 66088c57bc..0000000000
--- a/patches.suse/module-fix-livepatch-ftrace-module-text-permissions-race.patch
+++ /dev/null
@@ -1,170 +0,0 @@
-From: Josh Poimboeuf <jpoimboe@redhat.com>
-Date: Thu, 13 Jun 2019 20:07:22 -0500
-Subject: module: Fix livepatch/ftrace module text permissions race
-Git-commit: 9f255b632bf12c4dd7fc31caee89aa991ef75176
-Patch-mainline: v5.2-rc5
-References: bsc#1071995 fate#323487
-
-It's possible for livepatch and ftrace to be toggling a module's text
-permissions at the same time, resulting in the following panic:
-
- BUG: unable to handle page fault for address: ffffffffc005b1d9
- #PF: supervisor write access in kernel mode
- #PF: error_code(0x0003) - permissions violation
- PGD 3ea0c067 P4D 3ea0c067 PUD 3ea0e067 PMD 3cc13067 PTE 3b8a1061
- Oops: 0003 [#1] PREEMPT SMP PTI
- CPU: 1 PID: 453 Comm: insmod Tainted: G O K 5.2.0-rc1-a188339ca5 #1
- Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-20181126_142135-anatol 04/01/2014
- RIP: 0010:apply_relocate_add+0xbe/0x14c
- Code: fa 0b 74 21 48 83 fa 18 74 38 48 83 fa 0a 75 40 eb 08 48 83 38 00 74 33 eb 53 83 38 00 75 4e 89 08 89 c8 eb 0a 83 38 00 75 43 <89> 08 48 63 c1 48 39 c8 74 2e eb 48 83 38 00 75 32 48 29 c1 89 08
- RSP: 0018:ffffb223c00dbb10 EFLAGS: 00010246
- RAX: ffffffffc005b1d9 RBX: 0000000000000000 RCX: ffffffff8b200060
- RDX: 000000000000000b RSI: 0000004b0000000b RDI: ffff96bdfcd33000
- RBP: ffffb223c00dbb38 R08: ffffffffc005d040 R09: ffffffffc005c1f0
- R10: ffff96bdfcd33c40 R11: ffff96bdfcd33b80 R12: 0000000000000018
- R13: ffffffffc005c1f0 R14: ffffffffc005e708 R15: ffffffff8b2fbc74
- FS: 00007f5f447beba8(0000) GS:ffff96bdff900000(0000) knlGS:0000000000000000
- CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
- CR2: ffffffffc005b1d9 CR3: 000000003cedc002 CR4: 0000000000360ea0
- DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
- DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
- Call Trace:
- klp_init_object_loaded+0x10f/0x219
- ? preempt_latency_start+0x21/0x57
- klp_enable_patch+0x662/0x809
- ? virt_to_head_page+0x3a/0x3c
- ? kfree+0x8c/0x126
- patch_init+0x2ed/0x1000 [livepatch_test02]
- ? 0xffffffffc0060000
- do_one_initcall+0x9f/0x1c5
- ? kmem_cache_alloc_trace+0xc4/0xd4
- ? do_init_module+0x27/0x210
- do_init_module+0x5f/0x210
- load_module+0x1c41/0x2290
- ? fsnotify_path+0x3b/0x42
- ? strstarts+0x2b/0x2b
- ? kernel_read+0x58/0x65
- __do_sys_finit_module+0x9f/0xc3
- ? __do_sys_finit_module+0x9f/0xc3
- __x64_sys_finit_module+0x1a/0x1c
- do_syscall_64+0x52/0x61
- entry_SYSCALL_64_after_hwframe+0x44/0xa9
-
-The above panic occurs when loading two modules at the same time with
-ftrace enabled, where at least one of the modules is a livepatch module:
-
-CPU0 CPU1
-klp_enable_patch()
- klp_init_object_loaded()
- module_disable_ro()
- ftrace_module_enable()
- ftrace_arch_code_modify_post_process()
- set_all_modules_text_ro()
- klp_write_object_relocations()
- apply_relocate_add()
- *patches read-only code* - BOOM
-
-A similar race exists when toggling ftrace while loading a livepatch
-module.
-
-Fix it by ensuring that the livepatch and ftrace code patching
-operations -- and their respective permissions changes -- are protected
-by the text_mutex.
-
-Link: http://lkml.kernel.org/r/ab43d56ab909469ac5d2520c5d944ad6d4abd476.1560474114.git.jpoimboe@redhat.com
-
-Reported-by: Johannes Erdfelt <johannes@erdfelt.com>
-Fixes: 444d13ff10fb ("modules: add ro_after_init support")
-Acked-by: Jessica Yu <jeyu@kernel.org>
-Reviewed-by: Petr Mladek <pmladek@suse.com>
-Reviewed-by: Miroslav Benes <mbenes@suse.cz>
-Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
-Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-Acked-by: Miroslav Benes <mbenes@suse.cz>
----
- kernel/livepatch/core.c | 6 ++++++
- kernel/trace/ftrace.c | 10 +++++++++-
- 2 files changed, 15 insertions(+), 1 deletion(-)
-
---- a/kernel/livepatch/core.c
-+++ b/kernel/livepatch/core.c
-@@ -30,6 +30,7 @@
- #include <linux/elf.h>
- #include <linux/moduleloader.h>
- #include <linux/completion.h>
-+#include <linux/memory.h>
- #include <asm/cacheflush.h>
- #include "core.h"
- #include "patch.h"
-@@ -729,16 +730,21 @@ static int klp_init_object_loaded(struct
- struct klp_func *func;
- int ret;
-
-+ mutex_lock(&text_mutex);
-+
- module_disable_ro(patch->mod);
- ret = klp_write_object_relocations(patch->mod, obj);
- if (ret) {
- module_enable_ro(patch->mod, true);
-+ mutex_unlock(&text_mutex);
- return ret;
- }
-
- arch_klp_init_object_loaded(patch, obj);
- module_enable_ro(patch->mod, true);
-
-+ mutex_unlock(&text_mutex);
-+
- klp_for_each_func(obj, func) {
- ret = klp_find_object_symbol(obj->name, func->old_name,
- func->old_sympos,
---- a/kernel/trace/ftrace.c
-+++ b/kernel/trace/ftrace.c
-@@ -33,6 +33,7 @@
- #include <linux/list.h>
- #include <linux/hash.h>
- #include <linux/rcupdate.h>
-+#include <linux/memory.h>
-
- #include <trace/events/sched.h>
-
-@@ -2629,10 +2630,12 @@ static void ftrace_run_update_code(int c
- {
- int ret;
-
-+ mutex_lock(&text_mutex);
-+
- ret = ftrace_arch_code_modify_prepare();
- FTRACE_WARN_ON(ret);
- if (ret)
-- return;
-+ goto out_unlock;
-
- /*
- * By default we use stop_machine() to modify the code.
-@@ -2644,6 +2647,9 @@ static void ftrace_run_update_code(int c
-
- ret = ftrace_arch_code_modify_post_process();
- FTRACE_WARN_ON(ret);
-+
-+out_unlock:
-+ mutex_unlock(&text_mutex);
- }
-
- static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
-@@ -5409,6 +5415,7 @@ void ftrace_module_enable(struct module
- struct ftrace_page *pg;
-
- mutex_lock(&ftrace_lock);
-+ mutex_lock(&text_mutex);
-
- if (ftrace_disabled)
- goto out_unlock;
-@@ -5469,6 +5476,7 @@ void ftrace_module_enable(struct module
- ftrace_arch_code_modify_post_process();
-
- out_unlock:
-+ mutex_unlock(&text_mutex);
- mutex_unlock(&ftrace_lock);
- }
-
diff --git a/patches.suse/net-ibmvnic-prevent-more-than-one-thread-from-runnin.patch b/patches.suse/net-ibmvnic-prevent-more-than-one-thread-from-runnin.patch
new file mode 100644
index 0000000000..7b24b04511
--- /dev/null
+++ b/patches.suse/net-ibmvnic-prevent-more-than-one-thread-from-runnin.patch
@@ -0,0 +1,180 @@
+From 7ed5b31f4a6695a21f617df07646e9b15c6c1d29 Mon Sep 17 00:00:00 2001
+From: Juliet Kim <julietk@linux.vnet.ibm.com>
+Date: Fri, 20 Sep 2019 16:11:23 -0400
+Subject: [PATCH] net/ibmvnic: prevent more than one thread from running in
+ reset
+
+References: bsc#1152457 ltc#174432
+Patch-mainline: v5.4-rc1
+Git-commit: 7ed5b31f4a6695a21f617df07646e9b15c6c1d29
+
+The current code allows more than one thread to run in reset. This can
+corrupt struct adapter data. Check adapter->resetting before performing
+a reset, if there is another reset running delay (100 msec) before trying
+again.
+
+Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c | 40 ++++++++++++++++++++++--------
+ drivers/net/ethernet/ibm/ibmvnic.h | 5 +++-
+ 2 files changed, 34 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
+index d7db5cc51f6a..2b073a3c0b84 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -1207,7 +1207,7 @@ static void ibmvnic_cleanup(struct net_device *netdev)
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+ /* ensure that transmissions are stopped if called by do_reset */
+- if (adapter->resetting)
++ if (test_bit(0, &adapter->resetting))
+ netif_tx_disable(netdev);
+ else
+ netif_tx_stop_all_queues(netdev);
+@@ -1428,7 +1428,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ u8 proto = 0;
+ netdev_tx_t ret = NETDEV_TX_OK;
+
+- if (adapter->resetting) {
++ if (test_bit(0, &adapter->resetting)) {
+ if (!netif_subqueue_stopped(netdev, skb))
+ netif_stop_subqueue(netdev, queue_num);
+ dev_kfree_skb_any(skb);
+@@ -2054,6 +2054,12 @@ static void __ibmvnic_reset(struct work_struct *work)
+
+ adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
+
++ if (test_and_set_bit_lock(0, &adapter->resetting)) {
++ schedule_delayed_work(&adapter->ibmvnic_delayed_reset,
++ IBMVNIC_RESET_DELAY);
++ return;
++ }
++
+ reset_state = adapter->state;
+
+ rwi = get_next_rwi(adapter);
+@@ -2095,6 +2101,10 @@ static void __ibmvnic_reset(struct work_struct *work)
+ break;
+
+ rwi = get_next_rwi(adapter);
++
++ if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
++ rwi->reset_reason == VNIC_RESET_MOBILITY))
++ adapter->force_reset_recovery = true;
+ }
+
+ if (adapter->wait_for_reset) {
+@@ -2107,7 +2117,16 @@ static void __ibmvnic_reset(struct work_struct *work)
+ free_all_rwi(adapter);
+ }
+
+- adapter->resetting = false;
++ clear_bit_unlock(0, &adapter->resetting);
++}
++
++static void __ibmvnic_delayed_reset(struct work_struct *work)
++{
++ struct ibmvnic_adapter *adapter;
++
++ adapter = container_of(work, struct ibmvnic_adapter,
++ ibmvnic_delayed_reset.work);
++ __ibmvnic_reset(&adapter->ibmvnic_reset);
+ }
+
+ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+@@ -2162,7 +2181,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+ rwi->reset_reason = reason;
+ list_add_tail(&rwi->list, &adapter->rwi_list);
+ spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+- adapter->resetting = true;
+ netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
+ schedule_work(&adapter->ibmvnic_reset);
+
+@@ -2207,7 +2225,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
+ u16 offset;
+ u8 flags = 0;
+
+- if (unlikely(adapter->resetting &&
++ if (unlikely(test_bit(0, &adapter->resetting) &&
+ adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
+ enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]);
+ napi_complete_done(napi, frames_processed);
+@@ -2858,7 +2876,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
+ return 1;
+ }
+
+- if (adapter->resetting &&
++ if (test_bit(0, &adapter->resetting) &&
+ adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ u64 val = (0xff000000) | scrq->hw_irq;
+
+@@ -3408,7 +3426,7 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
+ if (rc) {
+ if (rc == H_CLOSED) {
+ dev_warn(dev, "CRQ Queue closed\n");
+- if (adapter->resetting)
++ if (test_bit(0, &adapter->resetting))
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ }
+
+@@ -4484,7 +4502,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
+ case IBMVNIC_CRQ_XPORT_EVENT:
+ netif_carrier_off(netdev);
+ adapter->crq.active = false;
+- if (adapter->resetting)
++ if (test_bit(0, &adapter->resetting))
+ adapter->force_reset_recovery = true;
+ if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
+ dev_info(dev, "Migrated, re-enabling adapter\n");
+@@ -4822,7 +4840,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
+ return -1;
+ }
+
+- if (adapter->resetting && !adapter->wait_for_reset &&
++ if (test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
+ adapter->reset_reason != VNIC_RESET_MOBILITY) {
+ if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues) {
+@@ -4934,10 +4952,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
+ spin_lock_init(&adapter->stats_lock);
+
+ INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
++ INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
++ __ibmvnic_delayed_reset);
+ INIT_LIST_HEAD(&adapter->rwi_list);
+ spin_lock_init(&adapter->rwi_lock);
+ init_completion(&adapter->init_done);
+- adapter->resetting = false;
++ clear_bit(0, &adapter->resetting);
+
+ do {
+ rc = init_crq_queue(adapter);
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
+index 9d3d35cc91d6..ebc39248b334 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.h
++++ b/drivers/net/ethernet/ibm/ibmvnic.h
+@@ -39,6 +39,8 @@
+ #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
+ #define IBMVNIC_BUFFER_HLEN 500
+
++#define IBMVNIC_RESET_DELAY 100
++
+ static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
+ #define IBMVNIC_USE_SERVER_MAXES 0x1
+ "use-server-maxes"
+@@ -1077,7 +1079,8 @@ struct ibmvnic_adapter {
+ spinlock_t rwi_lock;
+ struct list_head rwi_list;
+ struct work_struct ibmvnic_reset;
+- bool resetting;
++ struct delayed_work ibmvnic_delayed_reset;
++ unsigned long resetting;
+ bool napi_enabled, from_passive_init;
+
+ bool failover_pending;
+--
+2.23.0
+
diff --git a/patches.suse/net-ibmvnic-unlock-rtnl_lock-in-reset-so-linkwatch_e.patch b/patches.suse/net-ibmvnic-unlock-rtnl_lock-in-reset-so-linkwatch_e.patch
new file mode 100644
index 0000000000..b0f53f1990
--- /dev/null
+++ b/patches.suse/net-ibmvnic-unlock-rtnl_lock-in-reset-so-linkwatch_e.patch
@@ -0,0 +1,411 @@
+From b27507bb59ed504d7fa4d6a35f25a8cc39903b54 Mon Sep 17 00:00:00 2001
+From: Juliet Kim <julietk@linux.vnet.ibm.com>
+Date: Fri, 20 Sep 2019 16:11:22 -0400
+Subject: [PATCH] net/ibmvnic: unlock rtnl_lock in reset so linkwatch_event can
+ run
+
+References: bsc#1152457 ltc#174432
+Patch-mainline: v5.4-rc1
+Git-commit: b27507bb59ed504d7fa4d6a35f25a8cc39903b54
+
+Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
+made the change to hold the RTNL lock during a reset to avoid deadlock
+but linkwatch_event is fired during the reset and needs the RTNL lock.
+That keeps linkwatch_event process from proceeding until the reset
+is complete. The reset process cannot tolerate the linkwatch_event
+processing after reset completes, so release the RTNL lock during the
+process to allow a chance for linkwatch_event to run during reset.
+This does not guarantee that the linkwatch_event will be processed as
+soon as link state changes, but is an improvement over the current code
+where linkwatch_event processing is always delayed, which prevents
+transmissions on the device from being deactivated leading transmit
+watchdog timer to time-out.
+
+Release the RTNL lock before link state change and re-acquire after
+the link state change to allow linkwatch_event to grab the RTNL lock
+and run during the reset.
+
+Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
+Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c | 224 ++++++++++++++++++++---------
+ drivers/net/ethernet/ibm/ibmvnic.h | 1 +
+ 2 files changed, 157 insertions(+), 68 deletions(-)
+
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
+index 3816fff75bb5..d7db5cc51f6a 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -1723,6 +1723,86 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
+ return rc;
+ }
+
++/**
++ * do_change_param_reset returns zero if we are able to keep processing reset
++ * events, or non-zero if we hit a fatal error and must halt.
++ */
++static int do_change_param_reset(struct ibmvnic_adapter *adapter,
++ struct ibmvnic_rwi *rwi,
++ u32 reset_state)
++{
++ struct net_device *netdev = adapter->netdev;
++ int i, rc;
++
++ netdev_dbg(adapter->netdev, "Change param resetting driver (%d)\n",
++ rwi->reset_reason);
++
++ netif_carrier_off(netdev);
++ adapter->reset_reason = rwi->reset_reason;
++
++ ibmvnic_cleanup(netdev);
++
++ if (reset_state == VNIC_OPEN) {
++ rc = __ibmvnic_close(netdev);
++ if (rc)
++ return rc;
++ }
++
++ release_resources(adapter);
++ release_sub_crqs(adapter, 1);
++ release_crq_queue(adapter);
++
++ adapter->state = VNIC_PROBED;
++
++ rc = init_crq_queue(adapter);
++
++ if (rc) {
++ netdev_err(adapter->netdev,
++ "Couldn't initialize crq. rc=%d\n", rc);
++ return rc;
++ }
++
++ rc = ibmvnic_reset_init(adapter);
++ if (rc)
++ return IBMVNIC_INIT_FAILED;
++
++ /* If the adapter was in PROBE state prior to the reset,
++ * exit here.
++ */
++ if (reset_state == VNIC_PROBED)
++ return 0;
++
++ rc = ibmvnic_login(netdev);
++ if (rc) {
++ adapter->state = reset_state;
++ return rc;
++ }
++
++ rc = init_resources(adapter);
++ if (rc)
++ return rc;
++
++ ibmvnic_disable_irqs(adapter);
++
++ adapter->state = VNIC_CLOSED;
++
++ if (reset_state == VNIC_CLOSED)
++ return 0;
++
++ rc = __ibmvnic_open(netdev);
++ if (rc)
++ return IBMVNIC_OPEN_FAILED;
++
++ /* refresh device's multicast list */
++ ibmvnic_set_multi(netdev);
++
++ /* kick napi */
++ for (i = 0; i < adapter->req_rx_queues; i++)
++ napi_schedule(&adapter->napi[i]);
++
++ return 0;
++}
++
+ /**
+ * do_reset returns zero if we are able to keep processing reset events, or
+ * non-zero if we hit a fatal error and must halt.
+@@ -1738,6 +1818,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
+ rwi->reset_reason);
+
++ rtnl_lock();
++
+ netif_carrier_off(netdev);
+ adapter->reset_reason = rwi->reset_reason;
+
+@@ -1751,16 +1833,25 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ if (reset_state == VNIC_OPEN &&
+ adapter->reset_reason != VNIC_RESET_MOBILITY &&
+ adapter->reset_reason != VNIC_RESET_FAILOVER) {
+- rc = __ibmvnic_close(netdev);
++ adapter->state = VNIC_CLOSING;
++
++ /* Release the RTNL lock before link state change and
++ * re-acquire after the link state change to allow
++ * linkwatch_event to grab the RTNL lock and run during
++ * a reset.
++ */
++ rtnl_unlock();
++ rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
++ rtnl_lock();
+ if (rc)
+- return rc;
+- }
++ goto out;
+
+- if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
+- adapter->wait_for_reset) {
+- release_resources(adapter);
+- release_sub_crqs(adapter, 1);
+- release_crq_queue(adapter);
++ if (adapter->state != VNIC_CLOSING) {
++ rc = -1;
++ goto out;
++ }
++
++ adapter->state = VNIC_CLOSED;
+ }
+
+ if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
+@@ -1769,9 +1860,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ */
+ adapter->state = VNIC_PROBED;
+
+- if (adapter->wait_for_reset) {
+- rc = init_crq_queue(adapter);
+- } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
++ if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ rc = ibmvnic_reenable_crq_queue(adapter);
+ release_sub_crqs(adapter, 1);
+ } else {
+@@ -1783,36 +1872,35 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ if (rc) {
+ netdev_err(adapter->netdev,
+ "Couldn't initialize crq. rc=%d\n", rc);
+- return rc;
++ goto out;
+ }
+
+ rc = ibmvnic_reset_init(adapter);
+- if (rc)
+- return IBMVNIC_INIT_FAILED;
++ if (rc) {
++ rc = IBMVNIC_INIT_FAILED;
++ goto out;
++ }
+
+ /* If the adapter was in PROBE state prior to the reset,
+ * exit here.
+ */
+- if (reset_state == VNIC_PROBED)
+- return 0;
++ if (reset_state == VNIC_PROBED) {
++ rc = 0;
++ goto out;
++ }
+
+ rc = ibmvnic_login(netdev);
+ if (rc) {
+ adapter->state = reset_state;
+- return rc;
++ goto out;
+ }
+
+- if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
+- adapter->wait_for_reset) {
+- rc = init_resources(adapter);
+- if (rc)
+- return rc;
+- } else if (adapter->req_rx_queues != old_num_rx_queues ||
+- adapter->req_tx_queues != old_num_tx_queues ||
+- adapter->req_rx_add_entries_per_subcrq !=
+- old_num_rx_slots ||
+- adapter->req_tx_entries_per_subcrq !=
+- old_num_tx_slots) {
++ if (adapter->req_rx_queues != old_num_rx_queues ||
++ adapter->req_tx_queues != old_num_tx_queues ||
++ adapter->req_rx_add_entries_per_subcrq !=
++ old_num_rx_slots ||
++ adapter->req_tx_entries_per_subcrq !=
++ old_num_tx_slots) {
+ release_rx_pools(adapter);
+ release_tx_pools(adapter);
+ release_napi(adapter);
+@@ -1820,32 +1908,30 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+
+ rc = init_resources(adapter);
+ if (rc)
+- return rc;
++ goto out;
+
+ } else {
+ rc = reset_tx_pools(adapter);
+ if (rc)
+- return rc;
++ goto out;
+
+ rc = reset_rx_pools(adapter);
+ if (rc)
+- return rc;
++ goto out;
+ }
+ ibmvnic_disable_irqs(adapter);
+ }
+ adapter->state = VNIC_CLOSED;
+
+- if (reset_state == VNIC_CLOSED)
+- return 0;
++ if (reset_state == VNIC_CLOSED) {
++ rc = 0;
++ goto out;
++ }
+
+ rc = __ibmvnic_open(netdev);
+ if (rc) {
+- if (list_empty(&adapter->rwi_list))
+- adapter->state = VNIC_CLOSED;
+- else
+- adapter->state = reset_state;
+-
+- return 0;
++ rc = IBMVNIC_OPEN_FAILED;
++ goto out;
+ }
+
+ /* refresh device's multicast list */
+@@ -1855,11 +1941,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_schedule(&adapter->napi[i]);
+
+- if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
+- adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
++ if (adapter->reset_reason != VNIC_RESET_FAILOVER)
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
+
+- return 0;
++ rc = 0;
++
++out:
++ rtnl_unlock();
++
++ return rc;
+ }
+
+ static int do_hard_reset(struct ibmvnic_adapter *adapter,
+@@ -1919,14 +2009,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
+ return 0;
+
+ rc = __ibmvnic_open(netdev);
+- if (rc) {
+- if (list_empty(&adapter->rwi_list))
+- adapter->state = VNIC_CLOSED;
+- else
+- adapter->state = reset_state;
+-
+- return 0;
+- }
++ if (rc)
++ return IBMVNIC_OPEN_FAILED;
+
+ return 0;
+ }
+@@ -1965,20 +2049,11 @@ static void __ibmvnic_reset(struct work_struct *work)
+ {
+ struct ibmvnic_rwi *rwi;
+ struct ibmvnic_adapter *adapter;
+- bool we_lock_rtnl = false;
+ u32 reset_state;
+ int rc = 0;
+
+ adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
+
+- /* netif_set_real_num_xx_queues needs to take rtnl lock here
+- * unless wait_for_reset is set, in which case the rtnl lock
+- * has already been taken before initializing the reset
+- */
+- if (!adapter->wait_for_reset) {
+- rtnl_lock();
+- we_lock_rtnl = true;
+- }
+ reset_state = adapter->state;
+
+ rwi = get_next_rwi(adapter);
+@@ -1990,14 +2065,32 @@ static void __ibmvnic_reset(struct work_struct *work)
+ break;
+ }
+
+- if (adapter->force_reset_recovery) {
+- adapter->force_reset_recovery = false;
+- rc = do_hard_reset(adapter, rwi, reset_state);
++ if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
++ /* CHANGE_PARAM requestor holds rtnl_lock */
++ rc = do_change_param_reset(adapter, rwi, reset_state);
++ } else if (adapter->force_reset_recovery) {
++ /* Transport event occurred during previous reset */
++ if (adapter->wait_for_reset) {
++ /* Previous was CHANGE_PARAM; caller locked */
++ adapter->force_reset_recovery = false;
++ rc = do_hard_reset(adapter, rwi, reset_state);
++ } else {
++ rtnl_lock();
++ adapter->force_reset_recovery = false;
++ rc = do_hard_reset(adapter, rwi, reset_state);
++ rtnl_unlock();
++ }
+ } else {
+ rc = do_reset(adapter, rwi, reset_state);
+ }
+ kfree(rwi);
+- if (rc && rc != IBMVNIC_INIT_FAILED &&
++ if (rc == IBMVNIC_OPEN_FAILED) {
++ if (list_empty(&adapter->rwi_list))
++ adapter->state = VNIC_CLOSED;
++ else
++ adapter->state = reset_state;
++ rc = 0;
++ } else if (rc && rc != IBMVNIC_INIT_FAILED &&
+ !adapter->force_reset_recovery)
+ break;
+
+@@ -2005,7 +2098,6 @@ static void __ibmvnic_reset(struct work_struct *work)
+ }
+
+ if (adapter->wait_for_reset) {
+- adapter->wait_for_reset = false;
+ adapter->reset_done_rc = rc;
+ complete(&adapter->reset_done);
+ }
+@@ -2016,8 +2108,6 @@ static void __ibmvnic_reset(struct work_struct *work)
+ }
+
+ adapter->resetting = false;
+- if (we_lock_rtnl)
+- rtnl_unlock();
+ }
+
+ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+@@ -2078,8 +2168,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+
+ return 0;
+ err:
+- if (adapter->wait_for_reset)
+- adapter->wait_for_reset = false;
+ return -ret;
+ }
+
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
+index 70bd286f8932..9d3d35cc91d6 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.h
++++ b/drivers/net/ethernet/ibm/ibmvnic.h
+@@ -20,6 +20,7 @@
+ #define IBMVNIC_INVALID_MAP -1
+ #define IBMVNIC_STATS_TIMEOUT 1
+ #define IBMVNIC_INIT_FAILED 2
++#define IBMVNIC_OPEN_FAILED 3
+
+ /* basic structures plus 100 2k buffers */
+ #define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305
+--
+2.23.0
+
diff --git a/patches.suse/nfsd-Don-t-release-the-callback-slot-unless-it-was-a.patch b/patches.suse/nfsd-Don-t-release-the-callback-slot-unless-it-was-a.patch
new file mode 100644
index 0000000000..54a432c96a
--- /dev/null
+++ b/patches.suse/nfsd-Don-t-release-the-callback-slot-unless-it-was-a.patch
@@ -0,0 +1,81 @@
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Fri, 5 Apr 2019 08:54:37 -0700
+Subject: [PATCH] nfsd: Don't release the callback slot unless it was actually
+ held
+Git-commit: e6abc8caa6deb14be2a206253f7e1c5e37e9515b
+Patch-mainline: v5.1
+References: git-fixes
+
+If there are multiple callbacks queued, waiting for the callback
+slot when the callback gets shut down, then they all currently
+end up acting as if they hold the slot, and call
+nfsd4_cb_sequence_done() resulting in interesting side-effects.
+
+In addition, the 'retry_nowait' path in nfsd4_cb_sequence_done()
+causes a loop back to nfsd4_cb_prepare() without first freeing the
+slot, which causes a deadlock when nfsd41_cb_get_slot() gets called
+a second time.
+
+This patch therefore adds a boolean to track whether or not the
+callback did pick up the slot, so that it can do the right thing
+in these 2 cases.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfsd/nfs4callback.c | 8 +++++++-
+ fs/nfsd/state.h | 1 +
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -931,8 +931,9 @@ static void nfsd4_cb_prepare(struct rpc_
+ cb->cb_seq_status = 1;
+ cb->cb_status = 0;
+ if (minorversion) {
+- if (!nfsd41_cb_get_slot(clp, task))
++ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
+ return;
++ cb->cb_holds_slot = true;
+ }
+ rpc_call_start(task);
+ }
+@@ -959,6 +960,9 @@ static bool nfsd4_cb_sequence_done(struc
+ return true;
+ }
+
++ if (!cb->cb_holds_slot)
++ goto need_restart;
++
+ switch (cb->cb_seq_status) {
+ case 0:
+ /*
+@@ -996,6 +1000,7 @@ static bool nfsd4_cb_sequence_done(struc
+ cb->cb_seq_status);
+ }
+
++ cb->cb_holds_slot = false;
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ dprintk("%s: freed slot, new seqid=%d\n", __func__,
+@@ -1203,6 +1208,7 @@ void nfsd4_init_cb(struct nfsd4_callback
+ cb->cb_seq_status = 1;
+ cb->cb_status = 0;
+ cb->cb_need_restart = false;
++ cb->cb_holds_slot = false;
+ }
+
+ void nfsd4_run_cb(struct nfsd4_callback *cb)
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -69,6 +69,7 @@ struct nfsd4_callback {
+ int cb_seq_status;
+ int cb_status;
+ bool cb_need_restart;
++ bool cb_holds_slot;
+ };
+
+ struct nfsd4_callback_ops {
diff --git a/patches.suse/nvmem-use-the-same-permissions-for-eeprom-as-for-nvmem.patch b/patches.suse/nvmem-use-the-same-permissions-for-eeprom-as-for-nvmem.patch
new file mode 100644
index 0000000000..17bcffdafc
--- /dev/null
+++ b/patches.suse/nvmem-use-the-same-permissions-for-eeprom-as-for-nvmem.patch
@@ -0,0 +1,50 @@
+From: Jean Delvare <jdelvare@suse.de>
+Date: Sun, 28 Jul 2019 18:42:55 +0200
+Subject: [PATCH] nvmem: Use the same permissions for eeprom as for nvmem
+Git-commit: e70d8b287301eb6d7c7761c6171c56af62110ea3
+Patch-mainline: v5.3
+References: git-fixes
+
+The compatibility "eeprom" attribute is currently root-only no
+matter what the configuration says. The "nvmem" attribute does
+respect the setting of the root_only configuration bit, so do the
+same for "eeprom".
+
+Signed-off-by: Jean Delvare <jdelvare@suse.de>
+Fixes: b6c217ab9be6 ("nvmem: Add backwards compatibility support for older EEPROM drivers.")
+Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+Cc: Andrew Lunn <andrew@lunn.ch>
+Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Link: https://lore.kernel.org/r/20190728184255.563332e6@endymion
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+
+---
+ drivers/nvmem/core.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/drivers/nvmem/core.c
++++ b/drivers/nvmem/core.c
+@@ -401,10 +401,17 @@ static int nvmem_setup_compat(struct nvm
+ if (!config->base_dev)
+ return -EINVAL;
+
+- if (nvmem->read_only)
+- nvmem->eeprom = bin_attr_ro_root_nvmem;
+- else
+- nvmem->eeprom = bin_attr_rw_root_nvmem;
++ if (nvmem->read_only) {
++ if (config->root_only)
++ nvmem->eeprom = bin_attr_ro_root_nvmem;
++ else
++ nvmem->eeprom = bin_attr_ro_nvmem;
++ } else {
++ if (config->root_only)
++ nvmem->eeprom = bin_attr_rw_root_nvmem;
++ else
++ nvmem->eeprom = bin_attr_rw_nvmem;
++ }
+ nvmem->eeprom.attr.name = "eeprom";
+ nvmem->eeprom.size = nvmem->size;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/patches.suse/pNFS-flexfiles-Turn-off-soft-RPC-calls.patch b/patches.suse/pNFS-flexfiles-Turn-off-soft-RPC-calls.patch
new file mode 100644
index 0000000000..0b1eecc87e
--- /dev/null
+++ b/patches.suse/pNFS-flexfiles-Turn-off-soft-RPC-calls.patch
@@ -0,0 +1,42 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Thu, 15 Aug 2019 20:18:48 -0400
+Subject: [PATCH] pNFS/flexfiles: Turn off soft RPC calls
+Git-commit: bf2bf9b80e0cd3568ddc85a241abe0dd8b46ebdc
+Patch-mainline: v5.3
+References: git-fixes
+
+The pNFS/flexfiles I/O requests are sent with the SOFTCONN flag set, so
+they automatically time out if the connection breaks. It should
+therefore not be necessary to have the soft flag set in addition.
+
+Fixes: 5f01d9539496 ("nfs41: create NFSv3 DS connection if specified")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/pnfs_nfs.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/fs/nfs/pnfs_nfs.c
++++ b/fs/nfs/pnfs_nfs.c
+@@ -650,11 +650,15 @@ static int _nfs4_pnfs_v3_ds_connect(stru
+ /* Add this address as an alias */
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_test_and_add_xprt, NULL);
+- } else
+- clp = get_v3_ds_connect(mds_srv,
+- (struct sockaddr *)&da->da_addr,
+- da->da_addrlen, IPPROTO_TCP,
+- timeo, retrans);
++ continue;
++ }
++ clp = get_v3_ds_connect(mds_srv,
++ (struct sockaddr *)&da->da_addr,
++ da->da_addrlen, IPPROTO_TCP,
++ timeo, retrans);
++ if (IS_ERR(clp))
++ continue;
++ clp->cl_rpcclient->cl_softrtry = 0;
+ }
+
+ if (IS_ERR(clp)) {
diff --git a/patches.suse/pnfs-flexfiles-Fix-PTR_ERR-dereferences-in-ff_layout.patch b/patches.suse/pnfs-flexfiles-Fix-PTR_ERR-dereferences-in-ff_layout.patch
new file mode 100644
index 0000000000..ae587d02b3
--- /dev/null
+++ b/patches.suse/pnfs-flexfiles-Fix-PTR_ERR-dereferences-in-ff_layout.patch
@@ -0,0 +1,31 @@
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+Date: Wed, 17 Jul 2019 13:57:44 -0400
+Subject: [PATCH] pnfs/flexfiles: Fix PTR_ERR() dereferences in
+ ff_layout_track_ds_error
+Git-commit: 8e04fdfadda75a849c649f7e50fe7d97772e1fcb
+Patch-mainline: v5.3
+References: git-fixes
+
+mirror->mirror_ds can be NULL if uninitialised, but can contain
+a PTR_ERR() if call to GETDEVICEINFO failed.
+
+Fixes: 65990d1afbd2 ("pNFS/flexfiles: Fix a deadlock on LAYOUTGET")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org # 4.10+
+Acked-by: NeilBrown <neilb@suse.com>
+
+---
+ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+@@ -305,7 +305,7 @@ int ff_layout_track_ds_error(struct nfs4
+ if (status == 0)
+ return 0;
+
+- if (mirror->mirror_ds == NULL)
++ if (IS_ERR_OR_NULL(mirror->mirror_ds))
+ return -EINVAL;
+
+ dserr = kmalloc(sizeof(*dserr), gfp_flags);
diff --git a/patches.suse/powerpc-64s-Remove-POWER9-DD1-support.patch b/patches.suse/powerpc-64s-Remove-POWER9-DD1-support.patch
index 95fa88db68..f6ac54ccd4 100644
--- a/patches.suse/powerpc-64s-Remove-POWER9-DD1-support.patch
+++ b/patches.suse/powerpc-64s-Remove-POWER9-DD1-support.patch
@@ -165,14 +165,15 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
-@@ -43,7 +43,5 @@ extern void radix__flush_tlb_page_psize(
+@@ -43,8 +43,6 @@ extern void radix__flush_tlb_page_psize(
extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
-extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
- unsigned long address);
- #endif
+ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -209,7 +209,6 @@ static inline void cpu_feature_keys_init
@@ -779,7 +780,7 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
old_pte = __radix_pte_update(ptep, ~0, 0);
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
-@@ -548,24 +548,6 @@ void radix__flush_tlb_all(void)
+@@ -930,24 +930,6 @@ void radix__flush_tlb_all(void)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
diff --git a/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch b/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
new file mode 100644
index 0000000000..0d260438fe
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
@@ -0,0 +1,201 @@
+From 02390f66bd2362df114a0a0770d80ec33061f6d1 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 15 Jun 2018 11:38:37 +1000
+Subject: [PATCH] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss
+ problem with THP
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.18-rc2
+Git-commit: 02390f66bd2362df114a0a0770d80ec33061f6d1
+
+The patch 99baac21e4 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss
+problem") added a force flush mode to the mmu_gather flush, which
+unconditionally flushes the entire address range being invalidated
+(even if actual ptes only covered a smaller range), to solve a problem
+with concurrent threads invalidating the same PTEs causing them to
+miss TLBs that need flushing.
+
+This does not work with powerpc that invalidates mmu_gather batches
+according to page size. Have powerpc flush all possible page sizes in
+the range if it encounters this concurrency condition.
+
+Patch 4647706ebe ("mm: always flush VMA ranges affected by
+zap_page_range") does add a TLB flush for all page sizes on powerpc for
+the zap_page_range case, but that is to be removed and replaced with
+the mmu_gather flush to avoid redundant flushing. It is also thought to
+not cover other obscure race conditions:
+
+https://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
+
+Hash does not have a problem because it invalidates TLBs inside the
+page table locks.
+
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 96 +++++++++++++++++++++++++++++--------
+ 1 file changed, 75 insertions(+), 21 deletions(-)
+
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -445,22 +445,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_ra
+ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
+-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+- unsigned long end)
++static inline void __radix__flush_tlb_range(struct mm_struct *mm,
++ unsigned long start, unsigned long end,
++ bool flush_all_sizes)
+
+ {
+- struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+-#ifdef CONFIG_HUGETLB_PAGE
+- if (is_vm_hugetlb_page(vma))
+- return radix__flush_hugetlb_tlb_range(vma, start, end);
+-#endif
+-
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+@@ -486,37 +481,64 @@ void radix__flush_tlb_range(struct vm_ar
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ }
+ } else {
+- bool hflush = false;
++ bool hflush = flush_all_sizes;
++ bool gflush = flush_all_sizes;
+ unsigned long hstart, hend;
++ unsigned long gstart, gend;
+
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+- hend = end >> HPAGE_PMD_SHIFT;
+- if (hstart < hend) {
+- hstart <<= HPAGE_PMD_SHIFT;
+- hend <<= HPAGE_PMD_SHIFT;
++ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ hflush = true;
++
++ if (hflush) {
++ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
++ hend = end & PMD_MASK;
++ if (hstart == hend)
++ hflush = false;
++ }
++
++ if (gflush) {
++ gstart = (start + PUD_SIZE - 1) & PUD_MASK;
++ gend = end & PUD_MASK;
++ if (gstart == gend)
++ gflush = false;
+ }
+-#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+- HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ PMD_SIZE, MMU_PAGE_2M);
++ if (gflush)
++ __tlbiel_va_range(gstart, gend, pid,
++ PUD_SIZE, MMU_PAGE_1G);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+- HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ PMD_SIZE, MMU_PAGE_2M);
++ if (gflush)
++ __tlbie_va_range(gstart, gend, pid,
++ PUD_SIZE, MMU_PAGE_1G);
+ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
+ }
++
++void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
++ unsigned long end)
++
++{
++#ifdef CONFIG_HUGETLB_PAGE
++ if (is_vm_hugetlb_page(vma))
++ return radix__flush_hugetlb_tlb_range(vma, start, end);
++#endif
++
++ __radix__flush_tlb_range(vma->vm_mm, start, end, false);
++}
+ EXPORT_SYMBOL(radix__flush_tlb_range);
+
+ static int radix_get_mmu_psize(int page_size)
+@@ -542,6 +564,8 @@ void radix__tlb_flush(struct mmu_gather
+ int psize = 0;
+ struct mm_struct *mm = tlb->mm;
+ int page_size = tlb->page_size;
++ unsigned long start = tlb->start;
++ unsigned long end = tlb->end;
+
+ /*
+ * if page size is not something we understand, do a full mm flush
+@@ -552,15 +576,45 @@ void radix__tlb_flush(struct mmu_gather
+ */
+ if (tlb->fullmm) {
+ radix__flush_all_mm(mm);
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
++ } else if (mm_tlb_flush_nested(mm)) {
++ /*
++ * If there is a concurrent invalidation that is clearing ptes,
++ * then it's possible this invalidation will miss one of those
++ * cleared ptes and miss flushing the TLB. If this invalidate
++ * returns before the other one flushes TLBs, that can result
++ * in it returning while there are still valid TLBs inside the
++ * range to be invalidated.
++ *
++ * See mm/memory.c:tlb_finish_mmu() for more details.
++ *
++ * The solution to this is ensure the entire range is always
++ * flushed here. The problem for powerpc is that the flushes
++ * are page size specific, so this "forced flush" would not
++ * do the right thing if there are a mix of page sizes in
++ * the range to be invalidated. So use __flush_tlb_range
++ * which invalidates all possible page sizes in the range.
++ *
++ * PWC flush probably is not be required because the core code
++ * shouldn't free page tables in this path, but accounting
++ * for the possibility makes us a bit more robust.
++ *
++ * need_flush_all is an uncommon case because page table
++ * teardown should be done with exclusive locks held (but
++ * after locks are dropped another invalidate could come
++ * in), it could be optimized further if necessary.
++ */
++ if (!tlb->need_flush_all)
++ __radix__flush_tlb_range(mm, start, end, true);
++ else
++ radix__flush_all_mm(mm);
++#endif
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+- unsigned long start = tlb->start;
+- unsigned long end = tlb->end;
+-
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
diff --git a/patches.suse/powerpc-64s-radix-Fix-preempt-imbalance-in-TLB-flush.patch b/patches.suse/powerpc-64s-radix-Fix-preempt-imbalance-in-TLB-flush.patch
deleted file mode 100644
index 42f1dfdf49..0000000000
--- a/patches.suse/powerpc-64s-radix-Fix-preempt-imbalance-in-TLB-flush.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 26e53d5ebe2e2a5ff7343e820f0ffd69dd503f8e Mon Sep 17 00:00:00 2001
-From: Nicholas Piggin <npiggin@gmail.com>
-Date: Tue, 24 Oct 2017 23:06:52 +1000
-Subject: [PATCH] powerpc/64s/radix: Fix preempt imbalance in TLB flush
-
-References: bsc#1055186
-Patch-mainline: v4.14-rc8
-Git-commit: 26e53d5ebe2e2a5ff7343e820f0ffd69dd503f8e
-
-Fixes: 424de9c6e3f8 ("powerpc/mm/radix: Avoid flushing the PWC on every flush_tlb_range")
-Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
-Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-Acked-by: Michal Suchanek <msuchanek@suse.de>
----
- arch/powerpc/mm/tlb-radix.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
-index b3e849c4886e..d304028641a2 100644
---- a/arch/powerpc/mm/tlb-radix.c
-+++ b/arch/powerpc/mm/tlb-radix.c
-@@ -360,12 +360,14 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
-
-
- pid = mm ? mm->context.id : 0;
-+ preempt_disable();
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
-
- /* 4k page size, just blow the world */
- if (PAGE_SIZE == 0x1000) {
- radix__flush_all_mm(mm);
-+ preempt_enable();
- return;
- }
-
---
-2.13.6
-
diff --git a/patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch b/patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch
new file mode 100644
index 0000000000..104ac2853a
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch
@@ -0,0 +1,189 @@
+From d665767e39fa4a9e725f92d77ba2060c5ce273dc Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 7 Nov 2017 18:53:06 +1100
+Subject: [PATCH] powerpc/64s/radix: Implement _tlbie(l)_va_range flush
+ functions
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: d665767e39fa4a9e725f92d77ba2060c5ce273dc
+
+Move the barriers and range iteration down into the _tlbie* level,
+which improves readability.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 69 +++++++++++++++++++++----------------
+ 1 file changed, 39 insertions(+), 30 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index 22b657e4b01a..9916ea2fff43 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -85,7 +85,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+ }
+
+ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+- unsigned long ap, unsigned long ric)
++ unsigned long ap, unsigned long ric)
+ {
+ unsigned long rb,rs,prs,r;
+
+@@ -101,13 +101,28 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ }
+
+ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+- unsigned long ap, unsigned long ric)
++ unsigned long psize, unsigned long ric)
+ {
++ unsigned long ap = mmu_get_ap(psize);
++
+ asm volatile("ptesync": : :"memory");
+ __tlbiel_va(va, pid, ap, ric);
+ asm volatile("ptesync": : :"memory");
+ }
+
++static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ asm volatile("ptesync": : :"memory");
++ for (addr = start; addr < end; addr += page_size)
++ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++ asm volatile("ptesync": : :"memory");
++}
++
+ static inline void __tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
+ {
+@@ -125,13 +140,27 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
+ }
+
+ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+- unsigned long ap, unsigned long ric)
++ unsigned long psize, unsigned long ric)
+ {
++ unsigned long ap = mmu_get_ap(psize);
++
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, ric);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
++static inline void _tlbie_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ asm volatile("ptesync": : :"memory");
++ for (addr = start; addr < end; addr += page_size)
++ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
++}
+
+ /*
+ * Base TLB flushing operations:
+@@ -174,12 +203,11 @@ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmadd
+ int psize)
+ {
+ unsigned long pid;
+- unsigned long ap = mmu_get_ap(psize);
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
++ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ preempt_enable();
+ }
+
+@@ -239,7 +267,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
+ {
+ unsigned long pid;
+- unsigned long ap = mmu_get_ap(psize);
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+@@ -247,9 +274,9 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+
+ preempt_disable();
+ if (!mm_is_thread_local(mm))
+- _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
++ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ else
+- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
++ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ preempt_enable();
+ }
+
+@@ -336,9 +363,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+ {
+ unsigned long pid;
+- unsigned long addr;
+ bool local;
+- unsigned long ap = mmu_get_ap(psize);
+ unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+
+ pid = mm->context.id;
+@@ -354,17 +379,10 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+- asm volatile("ptesync": : :"memory");
+- for (addr = start; addr < end; addr += page_size) {
+- if (local)
+- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+- else
+- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+- }
+ if (local)
+- asm volatile("ptesync": : :"memory");
++ _tlbiel_va_range(start, end, pid, page_size, psize);
+ else
+- asm volatile("eieio; tlbsync; ptesync": : :"memory");
++ _tlbie_va_range(start, end, pid, page_size, psize);
+ }
+ preempt_enable();
+ }
+@@ -372,7 +390,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+- unsigned long ap = mmu_get_ap(mmu_virtual_psize);
+ unsigned long pid, end;
+ bool local;
+
+@@ -395,19 +412,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
+
+ /* Then iterate the pages */
+- asm volatile("ptesync": : :"memory");
+ end = addr + HPAGE_PMD_SIZE;
+- for (; addr < end; addr += PAGE_SIZE) {
+- if (local)
+- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+- else
+- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+- }
+-
+ if (local)
+- asm volatile("ptesync": : :"memory");
++ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+ else
+- asm volatile("eieio; tlbsync; ptesync": : :"memory");
++ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+
+ preempt_enable();
+ }
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-64s-radix-Improve-TLB-flushing-for-page-tabl.patch b/patches.suse/powerpc-64s-radix-Improve-TLB-flushing-for-page-tabl.patch
new file mode 100644
index 0000000000..e21795ce3e
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Improve-TLB-flushing-for-page-tabl.patch
@@ -0,0 +1,217 @@
+From 0b2f5a8a792755c88bd786f89712a9fac9967b2b Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 7 Nov 2017 18:53:09 +1100
+Subject: [PATCH] powerpc/64s/radix: Improve TLB flushing for page table
+ freeing
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: 0b2f5a8a792755c88bd786f89712a9fac9967b2b
+
+Unmaps that free page tables always flush the entire PID, which is
+sub-optimal. Provide TLB range flushing with an additional PWC flush
+that can be use for va range invalidations with PWC flush.
+
+ Time to munmap N pages of memory including last level page table
+ teardown (after mmap, touch), local invalidate:
+ N 1 2 4 8 16 32 64
+ vanilla 3.2us 3.3us 3.4us 3.6us 4.1us 5.2us 7.2us
+ patched 1.4us 1.5us 1.7us 1.9us 2.6us 3.7us 6.2us
+
+ Global invalidate:
+ N 1 2 4 8 16 32 64
+ vanilla 2.2us 2.3us 2.4us 2.6us 3.2us 4.1us 6.2us
+ patched 2.1us 2.5us 3.4us 5.2us 8.7us 15.7us 6.2us
+
+Local invalidates get much better across the board. Global ones have
+the same issue where multiple tlbies for va flush do get slower than
+the single tlbie to invalidate the PID. None of this test captures
+the TLB benefits of avoiding killing everything.
+
+Global gets worse, but it is brought in to line with global invalidate
+for munmap()s that do not free page tables.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 90 +++++++++++++++++++++++++------------
+ 1 file changed, 61 insertions(+), 29 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index cfa08da534a7..884f4b705b57 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -39,6 +39,20 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
++{
++ unsigned long rb,rs,prs,r;
++
++ rb = PPC_BIT(53); /* IS = 1 */
++ rs = pid << PPC_BITLSHIFT(31);
++ prs = 1; /* process scoped */
++ r = 1; /* raidx format */
++
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(0, 0, rb, rs, ric, prs, r);
++}
++
+ /*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ */
+@@ -70,18 +84,9 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+
+ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+ {
+- unsigned long rb,rs,prs,r;
+-
+- rb = PPC_BIT(53); /* IS = 1 */
+- rs = pid << PPC_BITLSHIFT(31);
+- prs = 1; /* process scoped */
+- r = 1; /* raidx format */
+-
+ asm volatile("ptesync": : :"memory");
+- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ __tlbie_pid(pid, ric);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+- trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
+ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+@@ -123,9 +128,11 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+
+ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+- unsigned long psize)
++ unsigned long psize, bool also_pwc)
+ {
+ asm volatile("ptesync": : :"memory");
++ if (also_pwc)
++ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+ }
+@@ -169,9 +176,11 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+
+ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+- unsigned long psize)
++ unsigned long psize, bool also_pwc)
+ {
+ asm volatile("ptesync": : :"memory");
++ if (also_pwc)
++ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+@@ -412,13 +421,15 @@ static int radix_get_mmu_psize(int page_size)
+ return psize;
+ }
+
++static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
++ unsigned long end, int psize);
++
+ void radix__tlb_flush(struct mmu_gather *tlb)
+ {
+ int psize = 0;
+ struct mm_struct *mm = tlb->mm;
+ int page_size = tlb->page_size;
+
+- psize = radix_get_mmu_psize(page_size);
+ /*
+ * if page size is not something we understand, do a full mm flush
+ *
+@@ -426,17 +437,28 @@ void radix__tlb_flush(struct mmu_gather *tlb)
+ * that flushes the process table entry cache upon process teardown.
+ * See the comment for radix in arch_exit_mmap().
+ */
+- if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
+- radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
+- else if (tlb->fullmm || tlb->need_flush_all) {
+- tlb->need_flush_all = 0;
++ if (tlb->fullmm) {
+ radix__flush_all_mm(mm);
+- } else
+- radix__flush_tlb_mm(mm);
++ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
++ if (!tlb->need_flush_all)
++ radix__flush_tlb_mm(mm);
++ else
++ radix__flush_all_mm(mm);
++ } else {
++ unsigned long start = tlb->start;
++ unsigned long end = tlb->end;
++
++ if (!tlb->need_flush_all)
++ radix__flush_tlb_range_psize(mm, start, end, psize);
++ else
++ radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
++ }
++ tlb->need_flush_all = 0;
+ }
+
+-void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+- unsigned long end, int psize)
++static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
++ unsigned long start, unsigned long end,
++ int psize, bool also_pwc)
+ {
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+@@ -461,18 +483,30 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+
+ if (full) {
+ if (local)
+- _tlbiel_pid(pid, RIC_FLUSH_TLB);
++ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ else
+- _tlbie_pid(pid, RIC_FLUSH_TLB);
++ _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
+ } else {
+ if (local)
+- _tlbiel_va_range(start, end, pid, page_size, psize);
++ _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+- _tlbie_va_range(start, end, pid, page_size, psize);
++ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+ }
+ preempt_enable();
+ }
+
++void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
++ unsigned long end, int psize)
++{
++ return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
++}
++
++static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
++ unsigned long end, int psize)
++{
++ __radix__flush_tlb_range_psize(mm, start, end, psize, true);
++}
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+@@ -494,11 +528,9 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ preempt_disable();
+
+ if (mm_is_thread_local(mm)) {
+- _tlbiel_pid(pid, RIC_FLUSH_PWC);
+- _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
++ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ } else {
+- _tlbie_pid(pid, RIC_FLUSH_PWC);
+- _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
++ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ }
+
+ preempt_enable();
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-64s-radix-Improve-preempt-handling-in-TLB-co.patch b/patches.suse/powerpc-64s-radix-Improve-preempt-handling-in-TLB-co.patch
new file mode 100644
index 0000000000..62ab163197
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Improve-preempt-handling-in-TLB-co.patch
@@ -0,0 +1,164 @@
+From dffe8449c5dd63ff18b47709de75553586582cd8 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 24 Oct 2017 23:06:53 +1000
+Subject: [PATCH] powerpc/64s/radix: Improve preempt handling in TLB code
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: dffe8449c5dd63ff18b47709de75553586582cd8
+
+Preempt should be consistently disabled for mm_is_thread_local tests,
+so bring the rest of these under preempt_disable().
+
+Preempt does not need to be disabled for the mm->context.id tests,
+which allows simplification and removal of gotos.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 48 ++++++++++++++++++-------------------
+ 1 file changed, 23 insertions(+), 25 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index 3a07d7a5e2fe..67191fe63283 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -186,16 +186,15 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
+ {
+ unsigned long pid;
+
+- preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+- goto no_context;
++ return;
+
++ preempt_disable();
+ if (!mm_is_thread_local(mm))
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+-no_context:
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_mm);
+@@ -204,16 +203,15 @@ void radix__flush_all_mm(struct mm_struct *mm)
+ {
+ unsigned long pid;
+
+- preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+- goto no_context;
++ return;
+
++ preempt_disable();
+ if (!mm_is_thread_local(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+-no_context:
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(radix__flush_all_mm);
+@@ -230,15 +228,15 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long pid;
+ unsigned long ap = mmu_get_ap(psize);
+
+- preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+- goto bail;
++ return;
++
++ preempt_disable();
+ if (!mm_is_thread_local(mm))
+ _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ else
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+-bail:
+ preempt_enable();
+ }
+
+@@ -322,46 +320,44 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ {
+ unsigned long pid;
+ unsigned long addr;
+- int local = mm_is_thread_local(mm);
++ bool local;
+ unsigned long ap = mmu_get_ap(psize);
+ unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+
+-
+- preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+- goto err_out;
++ return;
+
++ preempt_disable();
++ local = mm_is_thread_local(mm);
+ if (end == TLB_FLUSH_ALL ||
+ (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+- goto err_out;
+- }
+- for (addr = start; addr < end; addr += page_size) {
++ } else {
++ for (addr = start; addr < end; addr += page_size) {
+
+- if (local)
+- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+- else
+- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ if (local)
++ _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++ else
++ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ }
+ }
+-err_out:
+ preempt_enable();
+ }
+
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+- int local = mm_is_thread_local(mm);
+ unsigned long ap = mmu_get_ap(mmu_virtual_psize);
+ unsigned long pid, end;
+-
++ bool local;
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+- goto no_context;
++ return;
+
+ /* 4k page size, just blow the world */
+ if (PAGE_SIZE == 0x1000) {
+@@ -369,6 +365,8 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ return;
+ }
+
++ preempt_disable();
++ local = mm_is_thread_local(mm);
+ /* Otherwise first do the PWC */
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+@@ -383,7 +381,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ else
+ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ }
+-no_context:
++
+ preempt_enable();
+ }
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-64s-radix-Introduce-local-single-page-ceilin.patch b/patches.suse/powerpc-64s-radix-Introduce-local-single-page-ceilin.patch
new file mode 100644
index 0000000000..9689b166d7
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Introduce-local-single-page-ceilin.patch
@@ -0,0 +1,99 @@
+From f6f27951fdf84a6edca3ea14077268ad629b57ac Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 7 Nov 2017 18:53:08 +1100
+Subject: [PATCH] powerpc/64s/radix: Introduce local single page ceiling for
+ TLB range flush
+
+References: bsc#1055117 bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: f6f27951fdf84a6edca3ea14077268ad629b57ac
+
+The single page flush ceiling is the cut-off point at which we switch
+from invalidating individual pages, to invalidating the entire process
+address space in response to a range flush.
+
+Introduce a local variant of this heuristic because local and global
+tlbie have significantly different properties:
+- Local tlbiel requires 128 instructions to invalidate a PID, global
+ tlbie only 1 instruction.
+- Global tlbie instructions are expensive broadcast operations.
+
+The local ceiling has been made much higher, 2x the number of
+instructions required to invalidate the entire PID (i.e., 256 pages).
+
+ Time to mprotect N pages of memory (after mmap, touch), local invalidate:
+ N 32 34 64 128 256 512
+ vanilla 7.4us 9.0us 14.6us 26.4us 50.2us 98.3us
+ patched 7.4us 7.8us 13.8us 26.4us 51.9us 98.3us
+
+The behaviour of both is identical at N=32 and N=512. Between there,
+the vanilla kernel does a PID invalidate and the patched kernel does
+a va range invalidate.
+
+At N=128, these require the same number of tlbiel instructions, so
+the patched version can be sen to be cheaper when < 128, and more
+expensive when > 128. However this does not well capture the cost
+of invalidated TLB.
+
+The additional cost at 256 pages does not seem prohibitive. It may
+be the case that increasing the limit further would continue to be
+beneficial to avoid invalidating all of the process's TLB entries.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index b4b49de551a9..cfa08da534a7 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -326,6 +326,7 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+ * individual page flushes to full-pid flushes.
+ */
+ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
++static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
+ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+@@ -348,8 +349,15 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ return;
+
+ preempt_disable();
+- local = mm_is_thread_local(mm);
+- full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++ if (mm_is_thread_local(mm)) {
++ local = true;
++ full = (end == TLB_FLUSH_ALL ||
++ nr_pages > tlb_local_single_page_flush_ceiling);
++ } else {
++ local = false;
++ full = (end == TLB_FLUSH_ALL ||
++ nr_pages > tlb_single_page_flush_ceiling);
++ }
+
+ if (full) {
+ if (local)
+@@ -441,8 +449,15 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ return;
+
+ preempt_disable();
+- local = mm_is_thread_local(mm);
+- full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++ if (mm_is_thread_local(mm)) {
++ local = true;
++ full = (end == TLB_FLUSH_ALL ||
++ nr_pages > tlb_local_single_page_flush_ceiling);
++ } else {
++ local = false;
++ full = (end == TLB_FLUSH_ALL ||
++ nr_pages > tlb_single_page_flush_ceiling);
++ }
+
+ if (full) {
+ if (local)
+--
+2.21.0
+
diff --git a/patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch b/patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
index f169adaa89..cdf3cb7f59 100644
--- a/patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
+++ b/patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
@@ -29,18 +29,13 @@ patched 3.4us 14.4us
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-[mauricfo: backport:
- - hunk 4: update indentation and context lines -- equivalent logic (else / goto err_out)
- - hunk 6: update context lines, add one empty line to match upstream]
-Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
-
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/mm/tlb-radix.c | 42 +++++++++++++++++++++++++++++++++---------
- 1 file changed, 33 insertions(+), 9 deletions(-)
+ arch/powerpc/mm/tlb-radix.c | 41 +++++++++++++++++++++++++++++--------
+ 1 file changed, 32 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
-index a71e25a8911d..812b9c338285 100644
+index 63e277b6e60c..22b657e4b01a 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -84,7 +84,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
@@ -99,28 +94,28 @@ index a71e25a8911d..812b9c338285 100644
/*
* Base TLB flushing operations:
*
-@@ -342,13 +355,17 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+@@ -341,13 +354,17 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ else
_tlbie_pid(pid, RIC_FLUSH_TLB);
- goto err_out;
- }
-+ asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size) {
+ } else {
++ asm volatile("ptesync": : :"memory");
+ for (addr = start; addr < end; addr += page_size) {
-
- if (local)
-- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
-+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else
-- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
-+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ if (local)
+- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ else
+- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ }
++ if (local)
++ asm volatile("ptesync": : :"memory");
++ else
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-+ if (local)
-+ asm volatile("ptesync": : :"memory");
-+ else
-+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
- err_out:
preempt_enable();
}
-@@ -380,6 +397,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+@@ -378,6 +395,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
_tlbie_pid(pid, RIC_FLUSH_PWC);
/* Then iterate the pages */
@@ -128,19 +123,18 @@ index a71e25a8911d..812b9c338285 100644
end = addr + HPAGE_PMD_SIZE;
for (; addr < end; addr += PAGE_SIZE) {
if (local)
-@@ -387,6 +405,12 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
- else
+@@ -386,6 +404,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-+
+
+ if (local)
+ asm volatile("ptesync": : :"memory");
+ else
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+
- no_context:
preempt_enable();
}
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
--
-2.13.6
+2.23.0
diff --git a/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch b/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
new file mode 100644
index 0000000000..c8ebd7ebe2
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
@@ -0,0 +1,275 @@
+From cbf09c837720f72f5e63ab7a2d331ec6cc9a3417 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 7 Nov 2017 18:53:07 +1100
+Subject: [PATCH] powerpc/64s/radix: Optimize flush_tlb_range
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: cbf09c837720f72f5e63ab7a2d331ec6cc9a3417
+
+Currently for radix, flush_tlb_range flushes the entire PID, because
+the Linux mm code does not tell us about page size here for THP vs
+regular pages. This is quite sub-optimal for small mremap / mprotect
+/ change_protection.
+
+So implement va range flushes with two flush passes, one for each
+page size (regular and THP). The second flush has an order of matnitude
+fewer tlbie instructions than the first, so it is a relatively small
+additional cost.
+
+There is still room for improvement here with some changes to generic
+APIs, particularly if there are mostly THP pages to be invalidated,
+the small page flushes could be reduced.
+
+Time to mprotect 1 page of memory (after mmap, touch):
+vanilla 2.9us 1.8us
+patched 1.2us 1.6us
+
+Time to mprotect 30 pages of memory (after mmap, touch):
+vanilla 8.2us 7.2us
+patched 6.9us 17.9us
+
+Time to mprotect 34 pages of memory (after mmap, touch):
+vanilla 9.1us 8.0us
+patched 9.0us 8.0us
+
+34 pages is the point at which the invalidation switches from va
+to entire PID, which tlbie can do in a single instruction. This is
+why in the case of 30 pages, the new code runs slower for this test.
+This is a deliberate tradeoff already present in the unmap and THP
+promotion code, the idea is that the benefit from avoiding flushing
+entire TLB for this PID on all threads in the system.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 138 +++++++++++++++++++++++++++---------
+ 1 file changed, 103 insertions(+), 35 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index 9916ea2fff43..b4b49de551a9 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -100,6 +100,17 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ for (addr = start; addr < end; addr += page_size)
++ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++}
++
+ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+ {
+@@ -114,12 +125,8 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+ {
+- unsigned long addr;
+- unsigned long ap = mmu_get_ap(psize);
+-
+ asm volatile("ptesync": : :"memory");
+- for (addr = start; addr < end; addr += page_size)
+- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+ }
+
+@@ -139,6 +146,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbie_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ for (addr = start; addr < end; addr += page_size)
++ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++}
++
+ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+ {
+@@ -153,12 +171,8 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+ {
+- unsigned long addr;
+- unsigned long ap = mmu_get_ap(psize);
+-
+ asm volatile("ptesync": : :"memory");
+- for (addr = start; addr < end; addr += page_size)
+- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -300,17 +314,78 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+
++#define TLB_FLUSH_ALL -1UL
++
+ /*
+- * Currently, for range flushing, we just do a full mm flush. Because
+- * we use this in code path where we don' track the page size.
++ * Number of pages above which we invalidate the entire PID rather than
++ * flush individual pages, for local and global flushes respectively.
++ *
++ * tlbie goes out to the interconnect and individual ops are more costly.
++ * It also does not iterate over sets like the local tlbiel variant when
++ * invalidating a full PID, so it has a far lower threshold to change from
++ * individual page flushes to full-pid flushes.
+ */
++static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
++
+ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+ {
+ struct mm_struct *mm = vma->vm_mm;
++ unsigned long pid;
++ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
++ unsigned long page_size = 1UL << page_shift;
++ unsigned long nr_pages = (end - start) >> page_shift;
++ bool local, full;
++
++#ifdef CONFIG_HUGETLB_PAGE
++ if (is_vm_hugetlb_page(vma))
++ return radix__flush_hugetlb_tlb_range(vma, start, end);
++#endif
+
+- radix__flush_tlb_mm(mm);
++ pid = mm->context.id;
++ if (unlikely(pid == MMU_NO_CONTEXT))
++ return;
++
++ preempt_disable();
++ local = mm_is_thread_local(mm);
++ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++
++ if (full) {
++ if (local)
++ _tlbiel_pid(pid, RIC_FLUSH_TLB);
++ else
++ _tlbie_pid(pid, RIC_FLUSH_TLB);
++ } else {
++ bool hflush = false;
++ unsigned long hstart, hend;
++
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
++ hend = end >> HPAGE_PMD_SHIFT;
++ if (hstart < hend) {
++ hstart <<= HPAGE_PMD_SHIFT;
++ hend <<= HPAGE_PMD_SHIFT;
++ hflush = true;
++ }
++#endif
++
++ asm volatile("ptesync": : :"memory");
++ if (local) {
++ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
++ if (hflush)
++ __tlbiel_va_range(hstart, hend, pid,
++ HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ asm volatile("ptesync": : :"memory");
++ } else {
++ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
++ if (hflush)
++ __tlbie_va_range(hstart, hend, pid,
++ HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
++ }
++ }
++ preempt_enable();
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_range);
+
+@@ -352,19 +427,14 @@ void radix__tlb_flush(struct mmu_gather *tlb)
+ radix__flush_tlb_mm(mm);
+ }
+
+-#define TLB_FLUSH_ALL -1UL
+-/*
+- * Number of pages above which we will do a bcast tlbie. Just a
+- * number at this point copied from x86
+- */
+-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+-
+ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+ {
+ unsigned long pid;
+- bool local;
+- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
++ unsigned int page_shift = mmu_psize_defs[psize].shift;
++ unsigned long page_size = 1UL << page_shift;
++ unsigned long nr_pages = (end - start) >> page_shift;
++ bool local, full;
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+@@ -372,8 +442,9 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+
+ preempt_disable();
+ local = mm_is_thread_local(mm);
+- if (end == TLB_FLUSH_ALL ||
+- (end - start) > tlb_single_page_flush_ceiling * page_size) {
++ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++
++ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+@@ -391,7 +462,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+ unsigned long pid, end;
+- bool local;
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+@@ -403,20 +473,18 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ return;
+ }
+
++ end = addr + HPAGE_PMD_SIZE;
++
++ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+- local = mm_is_thread_local(mm);
+- /* Otherwise first do the PWC */
+- if (local)
+- _tlbiel_pid(pid, RIC_FLUSH_PWC);
+- else
+- _tlbie_pid(pid, RIC_FLUSH_PWC);
+
+- /* Then iterate the pages */
+- end = addr + HPAGE_PMD_SIZE;
+- if (local)
++ if (mm_is_thread_local(mm)) {
++ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+- else
++ } else {
++ _tlbie_pid(pid, RIC_FLUSH_PWC);
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
++ }
+
+ preempt_enable();
+ }
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-Drop-page_is_ram-and-walk_system_ram_range.patch b/patches.suse/powerpc-Drop-page_is_ram-and-walk_system_ram_range.patch
new file mode 100644
index 0000000000..b5de4848f4
--- /dev/null
+++ b/patches.suse/powerpc-Drop-page_is_ram-and-walk_system_ram_range.patch
@@ -0,0 +1,142 @@
+From 26b523356f49a0117c8f9e32ca98aa6d6e496e1a Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Fri, 1 Feb 2019 10:46:52 +0000
+Subject: [PATCH] powerpc: Drop page_is_ram() and walk_system_ram_range()
+
+References: bsc#1065729
+Patch-mainline: v5.1-rc1
+Git-commit: 26b523356f49a0117c8f9e32ca98aa6d6e496e1a
+
+Since commit c40dd2f76644 ("powerpc: Add System RAM to /proc/iomem")
+it is possible to use the generic walk_system_ram_range() and
+the generic page_is_ram().
+
+To enable the use of walk_system_ram_range() by the IBM EHEA ethernet
+driver, we still need an export of the generic function.
+
+As powerpc was the only user of CONFIG_ARCH_HAS_WALK_MEMORY, the
+ifdef around the generic walk_system_ram_range() has become useless
+and can be dropped.
+
+Fixes: c40dd2f76644 ("powerpc: Add System RAM to /proc/iomem")
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+[mpe: Keep the EXPORT_SYMBOL_GPL in powerpc code]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/Kconfig | 3 ---
+ arch/powerpc/include/asm/page.h | 1 -
+ arch/powerpc/mm/mem.c | 39 +++++----------------------------
+ kernel/resource.c | 4 ----
+ 4 files changed, 6 insertions(+), 41 deletions(-)
+
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 9c70c2864657..08908219fba9 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -478,9 +478,6 @@ config ARCH_CPU_PROBE_RELEASE
+ config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y
+
+-config ARCH_HAS_WALK_MEMORY
+- def_bool y
+-
+ config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
+
+diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
+index 5c5ea2413413..aa4497175bd3 100644
+--- a/arch/powerpc/include/asm/page.h
++++ b/arch/powerpc/include/asm/page.h
+@@ -326,7 +326,6 @@ struct page;
+ extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
+ extern void copy_user_page(void *to, void *from, unsigned long vaddr,
+ struct page *p);
+-extern int page_is_ram(unsigned long pfn);
+ extern int devmem_is_allowed(unsigned long pfn);
+
+ #ifdef CONFIG_PPC_SMLPAR
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -80,11 +80,6 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
+ #define TOP_ZONE ZONE_NORMAL
+ #endif
+
+-int page_is_ram(unsigned long pfn)
+-{
+- return memblock_is_memory(__pfn_to_phys(pfn));
+-}
+-
+ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot)
+ {
+@@ -166,34 +161,6 @@ int arch_remove_memory(u64 start, u64 si
+ #endif
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+
+-/*
+- * walk_memory_resource() needs to make sure there is no holes in a given
+- * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
+- * Instead it maintains it in memblock.memory structures. Walk through the
+- * memory regions, find holes and callback for contiguous regions.
+- */
+-int
+-walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+- void *arg, int (*func)(unsigned long, unsigned long, void *))
+-{
+- struct memblock_region *reg;
+- unsigned long end_pfn = start_pfn + nr_pages;
+- unsigned long tstart, tend;
+- int ret = -1;
+-
+- for_each_memblock(memory, reg) {
+- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+- if (tstart >= tend)
+- continue;
+- ret = (*func)(tstart, tend - tstart, arg);
+- if (ret)
+- break;
+- }
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(walk_system_ram_range);
+-
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+ void __init initmem_init(void)
+ {
+@@ -581,3 +548,9 @@ int devmem_is_allowed(unsigned long pfn)
+ return 0;
+ }
+ #endif /* CONFIG_STRICT_DEVMEM */
++
++/*
++ * This is defined in kernel/resource.c but only powerpc needs to export it, for
++ * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
++ */
++EXPORT_SYMBOL_GPL(walk_system_ram_range);
+diff --git a/kernel/resource.c b/kernel/resource.c
+index 915c02e8e5dd..e81b17b53fa5 100644
+--- a/kernel/resource.c
++++ b/kernel/resource.c
+@@ -448,8 +448,6 @@ int walk_mem_res(u64 start, u64 end, void *arg,
+ arg, func);
+ }
+
+-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+-
+ /*
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+@@ -481,8 +479,6 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+ return ret;
+ }
+
+-#endif
+-
+ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
+ {
+ return 1;
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-book3s64-mm-Don-t-do-tlbie-fixup-for-some-ha.patch b/patches.suse/powerpc-book3s64-mm-Don-t-do-tlbie-fixup-for-some-ha.patch
new file mode 100644
index 0000000000..5dddaa21b8
--- /dev/null
+++ b/patches.suse/powerpc-book3s64-mm-Don-t-do-tlbie-fixup-for-some-ha.patch
@@ -0,0 +1,80 @@
+From 677733e296b5c7a37c47da391fc70a43dc40bd67 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:51 +0530
+Subject: [PATCH] powerpc/book3s64/mm: Don't do tlbie fixup for some hardware
+ revisions
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v5.4-rc1
+Git-commit: 677733e296b5c7a37c47da391fc70a43dc40bd67
+
+The store ordering vs tlbie issue mentioned in commit
+a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on
+POWER9") is fixed for Nimbus 2.3 and Cumulus 1.3 revisions. We don't
+need to apply the fixup if we are running on them
+
+We can only do this on PowerNV. On pseries guest with KVM we still
+don't support redoing the feature fixup after migration. So we should
+be enabling all the workarounds needed, because whe can possibly
+migrate between DD 2.3 and DD 2.2
+
+Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9")
+Cc: stable@vger.kernel.org # v4.16+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-1-aneesh.kumar@linux.ibm.com
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/dt_cpu_ftrs.c | 30 ++++++++++++++++++++++++++++--
+ 1 file changed, 28 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -690,9 +690,35 @@ static bool __init cpufeatures_process_f
+ return true;
+ }
+
++/*
++ * Handle POWER9 broadcast tlbie invalidation issue using
++ * cpu feature flag.
++ */
++static __init void update_tlbie_feature_flag(unsigned long pvr)
++{
++ if (PVR_VER(pvr) == PVR_POWER9) {
++ /*
++ * Set the tlbie feature flag for anything below
++ * Nimbus DD 2.3 and Cumulus DD 1.3
++ */
++ if ((pvr & 0xe000) == 0) {
++ /* Nimbus */
++ if ((pvr & 0xfff) < 0x203)
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ } else if ((pvr & 0xc000) == 0) {
++ /* Cumulus */
++ if ((pvr & 0xfff) < 0x103)
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ } else {
++ WARN_ONCE(1, "Unknown PVR");
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ }
++ }
++}
++
+ static __init void cpufeatures_cpu_quirks(void)
+ {
+- int version = mfspr(SPRN_PVR);
++ unsigned long version = mfspr(SPRN_PVR);
+
+ /*
+ * Not all quirks can be derived from the cpufeatures device tree.
+@@ -711,9 +737,9 @@ static __init void cpufeatures_cpu_quirk
+
+ if ((version & 0xffff0000) == 0x004e0000) {
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+ }
+
++ update_tlbie_feature_flag(version);
+ /*
+ * PKEY was not in the initial base or feature node
+ * specification, but it should become optional in the next
diff --git a/patches.suse/powerpc-book3s64-radix-Rename-CPU_FTR_P9_TLBIE_BUG-f.patch b/patches.suse/powerpc-book3s64-radix-Rename-CPU_FTR_P9_TLBIE_BUG-f.patch
new file mode 100644
index 0000000000..a699eaefb1
--- /dev/null
+++ b/patches.suse/powerpc-book3s64-radix-Rename-CPU_FTR_P9_TLBIE_BUG-f.patch
@@ -0,0 +1,121 @@
+From 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:52 +0530
+Subject: [PATCH] powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature
+ flag
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v5.4-rc1
+Git-commit: 09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0
+
+Rename the #define to indicate this is related to store vs tlbie
+ordering issue. In the next patch, we will be adding another feature
+flag that is used to handles ERAT flush vs tlbie ordering issue.
+
+Fixes: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9")
+Cc: stable@vger.kernel.org # v4.16+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-2-aneesh.kumar@linux.ibm.com
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/cputable.h | 4 ++--
+ arch/powerpc/kernel/dt_cpu_ftrs.c | 6 +++---
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
+ arch/powerpc/mm/hash_native_64.c | 2 +-
+ arch/powerpc/mm/tlb-radix.c | 4 ++--
+ 5 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/powerpc/include/asm/cputable.h
++++ b/arch/powerpc/include/asm/cputable.h
+@@ -212,7 +212,7 @@ static inline void cpu_feature_keys_init
+ #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
+ #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
+ #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
+-#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
++#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
+
+ #ifndef __ASSEMBLY__
+
+@@ -460,7 +460,7 @@ static inline void cpu_feature_keys_init
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+- CPU_FTR_P9_TLBIE_BUG)
++ CPU_FTR_P9_TLBIE_STQ_BUG)
+ #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
+ #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
+ #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -704,14 +704,14 @@ static __init void update_tlbie_feature_
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+ }
+ }
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -453,7 +453,7 @@ static void do_tlbies(struct kvm *kvm, u
+ "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ }
+
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+--- a/arch/powerpc/mm/hash_native_64.c
++++ b/arch/powerpc/mm/hash_native_64.c
+@@ -203,7 +203,7 @@ static inline unsigned long ___tlbie(un
+
+ static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+ {
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ /* Need the extra ptesync to ensure we don't reorder tlbie*/
+ asm volatile("ptesync": : :"memory");
+ ___tlbie(vpn, psize, apsize, ssize);
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -218,7 +218,7 @@ static inline void fixup_tlbie(void)
+ unsigned long pid = 0;
+ unsigned long va = ((1UL << 52) - 1);
+
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+ }
+@@ -228,7 +228,7 @@ static inline void fixup_tlbie_lpid(unsi
+ {
+ unsigned long va = ((1UL << 52) - 1);
+
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+ }
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -156,7 +156,7 @@ static void kvmppc_radix_tlbie_page(stru
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
+ : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG))
+ asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
+ : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
diff --git a/patches.suse/powerpc-irq-Don-t-WARN-continuously-in-arch_local_ir.patch b/patches.suse/powerpc-irq-Don-t-WARN-continuously-in-arch_local_ir.patch
new file mode 100644
index 0000000000..92ea8222da
--- /dev/null
+++ b/patches.suse/powerpc-irq-Don-t-WARN-continuously-in-arch_local_ir.patch
@@ -0,0 +1,38 @@
+From 0fc12c022ad25532b66bf6f6c818ee1c1d63e702 Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Mon, 8 Jul 2019 16:02:19 +1000
+Subject: [PATCH] powerpc/irq: Don't WARN continuously in
+ arch_local_irq_restore()
+
+References: bsc#1065729
+Patch-mainline: v5.3-rc1
+Git-commit: 0fc12c022ad25532b66bf6f6c818ee1c1d63e702
+
+When CONFIG_PPC_IRQ_SOFT_MASK_DEBUG is enabled (uncommon), we have a
+series of WARN_ON's in arch_local_irq_restore().
+
+These are "should never happen" conditions, but if they do happen they
+can flood the console and render the system unusable. So switch them
+to WARN_ON_ONCE().
+
+Fixes: e2b36d591720 ("powerpc/64: Don't trace code that runs with the soft irq mask unreconciled")
+Fixes: 9b81c0211c24 ("powerpc/64s: make PACA_IRQ_HARD_DIS track MSR[EE] closely")
+Fixes: 7c0482e3d055 ("powerpc/irq: Fix another case of lazy IRQ state getting out of sync")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190708061046.7075-1-mpe@ellerman.id.au
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/irq.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/kernel/irq.c
++++ b/arch/powerpc/kernel/irq.c
+@@ -265,7 +265,7 @@ notrace void arch_local_irq_restore(unsi
+ * warn if we are wrong. Only do that when IRQ tracing
+ * is enabled as mfmsr() can be costly.
+ */
+- if (WARN_ON(mfmsr() & MSR_EE))
++ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+ #endif /* CONFIG_TRACE_IRQFLAGS */
diff --git a/patches.suse/powerpc-irq-drop-arch_early_irq_init.patch b/patches.suse/powerpc-irq-drop-arch_early_irq_init.patch
new file mode 100644
index 0000000000..4c9fa08a0b
--- /dev/null
+++ b/patches.suse/powerpc-irq-drop-arch_early_irq_init.patch
@@ -0,0 +1,40 @@
+From 607ea5090b3fb61fea1d0bc5278e6c1d40ab5bd6 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Tue, 8 Jan 2019 11:37:19 +0000
+Subject: [PATCH] powerpc/irq: drop arch_early_irq_init()
+
+References: bsc#1065729
+Patch-mainline: v5.1-rc1
+Git-commit: 607ea5090b3fb61fea1d0bc5278e6c1d40ab5bd6
+
+arch_early_irq_init() does nothing different than the weak
+arch_early_irq_init() in kernel/softirq.c
+
+Fixes: 089fb442f301 ("powerpc: Use ARCH_IRQ_INIT_FLAGS")
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/irq.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
+index 916ddc4aac44..bb299613a462 100644
+--- a/arch/powerpc/kernel/irq.c
++++ b/arch/powerpc/kernel/irq.c
+@@ -827,11 +827,6 @@ int irq_choose_cpu(const struct cpumask *mask)
+ }
+ #endif
+
+-int arch_early_irq_init(void)
+-{
+- return 0;
+-}
+-
+ #ifdef CONFIG_PPC64
+ static int __init setup_noirqdistrib(char *str)
+ {
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-mm-Fix-typo-in-comments.patch b/patches.suse/powerpc-mm-Fix-typo-in-comments.patch
index fe1148aec4..c490a9db28 100644
--- a/patches.suse/powerpc-mm-Fix-typo-in-comments.patch
+++ b/patches.suse/powerpc-mm-Fix-typo-in-comments.patch
@@ -26,34 +26,34 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-@@ -114,7 +114,7 @@ static inline void __tlbiel_va(unsigned
- rb |= ap << PPC_BITLSHIFT(58);
+@@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned
+ rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-@@ -130,7 +130,7 @@ static inline void __tlbie_va(unsigned l
+@@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-@@ -184,7 +184,7 @@ static inline void _tlbie_pid(unsigned l
- rb = PPC_BIT(53); /* IS = 1 */
+@@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned l
+ rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-@@ -528,7 +528,7 @@ void radix__flush_tlb_all(void)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+@@ -653,7 +653,7 @@ void radix__flush_tlb_all(void)
rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
prs = 0; /* partition scoped */
diff --git a/patches.suse/powerpc-mm-Fixup-tlbie-vs-mtpidr-mtlpidr-ordering-is.patch b/patches.suse/powerpc-mm-Fixup-tlbie-vs-mtpidr-mtlpidr-ordering-is.patch
new file mode 100644
index 0000000000..4611fb8f73
--- /dev/null
+++ b/patches.suse/powerpc-mm-Fixup-tlbie-vs-mtpidr-mtlpidr-ordering-is.patch
@@ -0,0 +1,347 @@
+From 047e6575aec71d75b765c22111820c4776cd1c43 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Tue, 24 Sep 2019 09:22:53 +0530
+Subject: [PATCH] powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on
+ POWER9
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v5.4-rc1
+Git-commit: 047e6575aec71d75b765c22111820c4776cd1c43
+
+On POWER9, under some circumstances, a broadcast TLB invalidation will
+fail to invalidate the ERAT cache on some threads when there are
+parallel mtpidr/mtlpidr happening on other threads of the same core.
+This can cause stores to continue to go to a page after it's unmapped.
+
+The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie
+flush. This additional TLB flush will cause the ERAT cache
+invalidation. Since we are using PID=0 or LPID=0, we don't get
+filtered out by the TLB snoop filtering logic.
+
+We need to still follow this up with another tlbie to take care of
+store vs tlbie ordering issue explained in commit:
+a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on
+POWER9"). The presence of ERAT cache implies we can still get new
+stores and they may miss store queue marking flush.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/cputable.h | 3 +-
+ arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +
+ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 42 ++++++++++----
+ arch/powerpc/mm/hash_native_64.c | 29 +++++++++-
+ arch/powerpc/mm/tlb-radix.c | 80 +++++++++++++++++++++++---
+ 5 files changed, 134 insertions(+), 22 deletions(-)
+
+--- a/arch/powerpc/include/asm/cputable.h
++++ b/arch/powerpc/include/asm/cputable.h
+@@ -213,6 +213,7 @@ static inline void cpu_feature_keys_init
+ #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
+ #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
+ #define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
++#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
+
+ #ifndef __ASSEMBLY__
+
+@@ -460,7 +461,7 @@ static inline void cpu_feature_keys_init
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+- CPU_FTR_P9_TLBIE_STQ_BUG)
++ CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG)
+ #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
+ #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
+ #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
++++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
+@@ -713,6 +713,8 @@ static __init void update_tlbie_feature_
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
++
++ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+ }
+
+--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+@@ -435,6 +435,37 @@ static inline int is_mmio_hpte(unsigned
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+ }
+
++static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
++{
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ /* Radix flush for a hash guest */
++
++ unsigned long rb,rs,prs,r,ric;
++
++ rb = PPC_BIT(52); /* IS = 2 */
++ rs = 0; /* lpid = 0 */
++ prs = 0; /* partition scoped */
++ r = 1; /* radix format */
++ ric = 0; /* RIC_FLSUH_TLB */
++
++ /*
++ * Need the extra ptesync to make sure we don't
++ * re-order the tlbie
++ */
++ asm volatile("ptesync": : :"memory");
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs),
++ "i"(ric), "r"(rs) : "memory");
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
++ "r" (rb_value), "r" (lpid));
++ }
++}
++
+ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+ long npages, int global, bool need_sync)
+ {
+@@ -453,16 +484,7 @@ static void do_tlbies(struct kvm *kvm, u
+ "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ }
+
+- if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+- /*
+- * Need the extra ptesync to make sure we don't
+- * re-order the tlbie
+- */
+- asm volatile("ptesync": : :"memory");
+- asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+- "r" (rbvalues[0]), "r" (kvm->arch.lpid));
+- }
+-
++ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ } else {
+ if (need_sync)
+--- a/arch/powerpc/mm/hash_native_64.c
++++ b/arch/powerpc/mm/hash_native_64.c
+@@ -201,8 +201,31 @@ static inline unsigned long ___tlbie(un
+ return va;
+ }
+
+-static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
++static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
++ int apsize, int ssize)
+ {
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ /* Radix flush for a hash guest */
++
++ unsigned long rb,rs,prs,r,ric;
++
++ rb = PPC_BIT(52); /* IS = 2 */
++ rs = 0; /* lpid = 0 */
++ prs = 0; /* partition scoped */
++ r = 1; /* radix format */
++ ric = 0; /* RIC_FLSUH_TLB */
++
++ /*
++ * Need the extra ptesync to make sure we don't
++ * re-order the tlbie
++ */
++ asm volatile("ptesync": : :"memory");
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs),
++ "i"(ric), "r"(rs) : "memory");
++ }
++
++
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ /* Need the extra ptesync to ensure we don't reorder tlbie*/
+ asm volatile("ptesync": : :"memory");
+@@ -287,7 +310,7 @@ static inline void tlbie(unsigned long v
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(vpn, psize, apsize, ssize);
+- fixup_tlbie(vpn, psize, apsize, ssize);
++ fixup_tlbie_vpn(vpn, psize, apsize, ssize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+@@ -824,7 +847,7 @@ static void native_flush_hash_range(unsi
+ /*
+ * Just do one more with the last used values.
+ */
+- fixup_tlbie(vpn, psize, psize, ssize);
++ fixup_tlbie_vpn(vpn, psize, psize, ssize);
+ asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+ if (lock_tlbie)
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -213,21 +213,82 @@ static inline void __tlbie_lpid_va(unsig
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+ }
+
+-static inline void fixup_tlbie(void)
++
++static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
++ unsigned long ap)
+ {
+- unsigned long pid = 0;
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
++ }
++}
++
++static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
++ unsigned long ap)
++{
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_pid(0, RIC_FLUSH_TLB);
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
++ }
++}
++
++static inline void fixup_tlbie_pid(unsigned long pid)
++{
++ /*
++ * We can use any address for the invalidation, pick one which is
++ * probably unused as an optimisation.
++ */
+ unsigned long va = ((1UL << 52) - 1);
+
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_pid(0, RIC_FLUSH_TLB);
++ }
++
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+ }
+ }
+
++
++static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
++ unsigned long ap)
++{
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
++ }
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
++ }
++}
++
+ static inline void fixup_tlbie_lpid(unsigned long lpid)
+ {
++ /*
++ * We can use any address for the invalidation, pick one which is
++ * probably unused as an optimisation.
++ */
+ unsigned long va = ((1UL << 52) - 1);
+
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_lpid(0, RIC_FLUSH_TLB);
++ }
++
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+@@ -275,6 +336,7 @@ static inline void _tlbie_pid(unsigned l
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid(pid, RIC_FLUSH_TLB);
++ fixup_tlbie_pid(pid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+@@ -282,8 +344,8 @@ static inline void _tlbie_pid(unsigned l
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid(pid, RIC_FLUSH_ALL);
++ fixup_tlbie_pid(pid);
+ }
+- fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -327,6 +389,7 @@ static inline void _tlbie_lpid(unsigned
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid(lpid, RIC_FLUSH_TLB);
++ fixup_tlbie_lpid(lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid(lpid, RIC_FLUSH_PWC);
+@@ -334,8 +397,8 @@ static inline void _tlbie_lpid(unsigned
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid(lpid, RIC_FLUSH_ALL);
++ fixup_tlbie_lpid(lpid);
+ }
+- fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -408,6 +471,8 @@ static inline void __tlbie_va_range(unsi
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++
++ fixup_tlbie_va_range(addr - page_size, pid, ap);
+ }
+
+ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+@@ -417,7 +482,7 @@ static inline void _tlbie_va(unsigned lo
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, ric);
+- fixup_tlbie();
++ fixup_tlbie_va(va, pid, ap);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -428,7 +493,7 @@ static inline void _tlbie_lpid_va(unsign
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, ric);
+- fixup_tlbie_lpid(lpid);
++ fixup_tlbie_lpid_va(va, lpid, ap);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -440,7 +505,6 @@ static inline void _tlbie_va_range(unsig
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+- fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -677,7 +741,7 @@ static inline void __radix__flush_tlb_ra
+ if (gflush)
+ __tlbie_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
+- fixup_tlbie();
++
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
diff --git a/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch b/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
index ecb0038e9a..75966561d2 100644
--- a/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
+++ b/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
@@ -20,23 +20,6 @@ Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Enable the feature in the DT CPU features code for all Power9,
rename the feature to CPU_FTR_P9_TLBIE_BUG per benh.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-[mauricfo: backport: tlb-radix.c :: cover '__tlbie_va()'
- - hunk 2: update context lines (includes the inline assembly still in
- _tlbie_pid() in SLES15 as __tlbie_pie() does not exist yet.
- equivalent logic: the fixup is still between inline assembly
- statements PPC_TLBIE_5 and the syncs)
- - hunk 3: update context line 1
- - hunk 4: move fixup_tlbie() into radix__flush_tlb_range_psize():
- SLES15 does not have __tlbie_va_range(), in upstream its
- callers are __radix__flush_tlb_range_psize() (which here
- calls __tlbie_va() directly, so add the fixup after that)
- and radix__flush_tlb_collapsed_pmd() (in SLES15 it calls
- the wrapper directly, so it's already fixed up).
- - hunk 5: removed -- the modified function radix__flush_tlb_range()
- in SLES15 just redirects to a full flush, which calls the
- _tlbie_pid() wrapper, which is already fixed up.]
-Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
-
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
arch/powerpc/include/asm/cputable.h | 3 ++-
@@ -49,7 +32,7 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
7 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
-index a2c5c95..2e2bacb 100644
+index a2c5c95882cf..2e2bacbdf6ed 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -203,6 +203,7 @@ static inline void cpu_feature_keys_init(void) { }
@@ -70,7 +53,7 @@ index a2c5c95..2e2bacb 100644
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
-index 0bcfb0f..8ca5d5b7 100644
+index 0bcfb0f256e1..8ca5d5b74618 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -709,6 +709,9 @@ static __init void cpufeatures_cpu_quirks(void)
@@ -84,7 +67,7 @@ index 0bcfb0f..8ca5d5b7 100644
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
-index 0c85481..0837b97 100644
+index 0c854816e653..0837b9738d76 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
@@ -98,7 +81,7 @@ index 0c85481..0837b97 100644
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
-index 8888e62..e1c083f 100644
+index 8888e625a999..e1c083fbe434 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
@@ -120,7 +103,7 @@ index 8888e62..e1c083f 100644
kvm->arch.tlbie_lock = 0;
} else {
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
-index a0675e9..656933c 100644
+index a0675e91ad7d..656933c85925 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
@@ -168,7 +151,7 @@ index a0675e9..656933c 100644
if (lock_tlbie)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
-index 28c980e..adf469f 100644
+index 28c980eb4422..adf469f312f2 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -481,6 +481,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
@@ -180,7 +163,7 @@ index 28c980e..adf469f 100644
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
-index 74354c2..a07f537 100644
+index 74354c26d316..a07f5372a4bf 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -151,6 +151,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
@@ -202,33 +185,37 @@ index 74354c2..a07f537 100644
* We use 128 set in radix mode and 256 set in hpt mode.
*/
@@ -200,6 +211,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ default:
+ __tlbie_pid(pid, RIC_FLUSH_ALL);
+ }
+ fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+
@@ -253,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
- {
+
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
+ fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-@@ -264,8 +277,10 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
- for (addr = start; addr < end; addr += page_size) {
- if (local)
- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
-- else
-+ else {
- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+@@ -264,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
++ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -498,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ fixup_tlbie();
-+ }
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
}
- if (local)
- asm volatile("ptesync": : :"memory");
--
-cgit v1.1
+2.23.0
diff --git a/patches.suse/powerpc-mm-Simplify-page_is_ram-by-using-memblock_is.patch b/patches.suse/powerpc-mm-Simplify-page_is_ram-by-using-memblock_is.patch
new file mode 100644
index 0000000000..ed98ea264a
--- /dev/null
+++ b/patches.suse/powerpc-mm-Simplify-page_is_ram-by-using-memblock_is.patch
@@ -0,0 +1,43 @@
+From 2615c93e5f52db62586112793d889face99eb905 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
+Date: Wed, 28 Mar 2018 02:25:40 +0200
+Subject: [PATCH] powerpc/mm: Simplify page_is_ram by using memblock_is_memory
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+References: bsc#1065729
+Patch-mainline: v4.17-rc1
+Git-commit: 2615c93e5f52db62586112793d889face99eb905
+
+Instead of open-coding the search in page_is_ram, call memblock_is_memory.
+
+Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/mem.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
+index e2f5025b03b0..8f335cf052f8 100644
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -85,13 +85,7 @@ int page_is_ram(unsigned long pfn)
+ #ifndef CONFIG_PPC64 /* XXX for now */
+ return pfn < max_pfn;
+ #else
+- unsigned long paddr = (pfn << PAGE_SHIFT);
+- struct memblock_region *reg;
+-
+- for_each_memblock(memory, reg)
+- if (paddr >= reg->base && paddr < (reg->base + reg->size))
+- return 1;
+- return 0;
++ return memblock_is_memory(__pfn_to_phys(pfn));
+ #endif
+ }
+
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-mm-Use-memblock-API-for-PPC32-page_is_ram.patch b/patches.suse/powerpc-mm-Use-memblock-API-for-PPC32-page_is_ram.patch
new file mode 100644
index 0000000000..6def0c2e0c
--- /dev/null
+++ b/patches.suse/powerpc-mm-Use-memblock-API-for-PPC32-page_is_ram.patch
@@ -0,0 +1,42 @@
+From f65e67c7e3308c0af08080782d79a8cb95c44929 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
+Date: Wed, 28 Mar 2018 02:25:41 +0200
+Subject: [PATCH] powerpc/mm: Use memblock API for PPC32 page_is_ram
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+References: bsc#1065729
+Patch-mainline: v4.17-rc1
+Git-commit: f65e67c7e3308c0af08080782d79a8cb95c44929
+
+To support accurate checking for different blocks of memory on PPC32,
+use the same memblock-based approach that's already used on PPC64 also
+on PPC32.
+
+Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/mem.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
+index 8f335cf052f8..737f8a4632cc 100644
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -82,11 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
+
+ int page_is_ram(unsigned long pfn)
+ {
+-#ifndef CONFIG_PPC64 /* XXX for now */
+- return pfn < max_pfn;
+-#else
+ return memblock_is_memory(__pfn_to_phys(pfn));
+-#endif
+ }
+
+ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch b/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
index 422f153063..fbba0209c3 100644
--- a/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
+++ b/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
@@ -22,30 +22,41 @@ Signed-off-by: Balbir Singh <bsingharora@gmail.com>
[balbirs: fixed spelling and coding style to quiesce checkpatch.pl]
Tested-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-[mauricfo: backport: essentially, cover '_tlbie_pid(pid, RIC_FLUSH_TLB)'
- - hunk 1: removed -- not required since there's no __tlbie_pid()
- in SLES 15, so _tlbie_pid() is called directly using
- the compile-time constraint.
- - hunk 3: update context lines.
- - hunk 4: update context lines (to match a different function;
- in SLES 15, original target radix__flush_tlb_range()
- just calls radix__flush_tlb_mm() which is already
- patched; but there is another instance of _tlbie_pid(
- (pid, RIC_FLUSH_TLB) in radix__flush_tlb_range_psize(),
- so cover that one; equivalent logic (else / goto err_out).
- - hunk 5: removed -- this function does not exist in SLES 15.]
-Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
-
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/mm/tlb-radix.c | 29 +++++++++++++++++++++++------
- 1 file changed, 23 insertions(+), 6 deletions(-)
+ arch/powerpc/mm/tlb-radix.c | 50 +++++++++++++++++++++++++++++++------
+ 1 file changed, 43 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
-index f136cb4c4255..c26a92cfa4a6 100644
+index 71d1b19ad1c0..8e4c6cb4a808 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
-@@ -261,6 +261,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
+@@ -151,7 +151,23 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+ {
+ asm volatile("ptesync": : :"memory");
+- __tlbie_pid(pid, ric);
++
++ /*
++ * Workaround the fact that the "ric" argument to __tlbie_pid
++ * must be a compile-time contraint to match the "i" constraint
++ * in the asm statement.
++ */
++ switch (ric) {
++ case RIC_FLUSH_TLB:
++ __tlbie_pid(pid, RIC_FLUSH_TLB);
++ break;
++ case RIC_FLUSH_PWC:
++ __tlbie_pid(pid, RIC_FLUSH_PWC);
++ break;
++ case RIC_FLUSH_ALL:
++ default:
++ __tlbie_pid(pid, RIC_FLUSH_ALL);
++ }
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -311,6 +327,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
@@ -62,10 +73,10 @@ index f136cb4c4255..c26a92cfa4a6 100644
#ifdef CONFIG_SMP
void radix__flush_tlb_mm(struct mm_struct *mm)
{
-@@ -271,9 +281,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+@@ -321,9 +347,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
+ return;
+ preempt_disable();
- if (!mm_is_thread_local(mm))
- _tlbie_pid(pid, RIC_FLUSH_TLB);
- else
@@ -76,12 +87,12 @@ index f136cb4c4255..c26a92cfa4a6 100644
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else
_tlbiel_pid(pid, RIC_FLUSH_TLB);
- no_context:
preempt_enable();
-@@ -415,10 +428,14 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ }
+@@ -435,10 +464,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ }
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (full) {
- if (local)
+ if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -93,9 +104,19 @@ index f136cb4c4255..c26a92cfa4a6 100644
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ }
- goto err_out;
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+@@ -548,6 +581,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
}
- asm volatile("ptesync": : :"memory");
+
+ if (full) {
++ if (!local && mm_needs_flush_escalation(mm))
++ also_pwc = true;
++
+ if (local)
+ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ else
--
-2.13.6
+2.23.0
diff --git a/patches.suse/powerpc-mm-radix-Drop-unneeded-NULL-check.patch b/patches.suse/powerpc-mm-radix-Drop-unneeded-NULL-check.patch
new file mode 100644
index 0000000000..b5f79c5d8e
--- /dev/null
+++ b/patches.suse/powerpc-mm-radix-Drop-unneeded-NULL-check.patch
@@ -0,0 +1,112 @@
+From 6773027205ea4ccf17055d7f0a0cd813f22fe127 Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Mon, 16 Oct 2017 12:41:00 +0530
+Subject: [PATCH] powerpc/mm/radix: Drop unneeded NULL check
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: 6773027205ea4ccf17055d7f0a0cd813f22fe127
+
+We call these functions with non-NULL mm or vma. Hence we can skip the
+NULL check in these functions. We also remove now unused function
+__local_flush_hugetlb_page().
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+[mpe: Drop the checks with is_vm_hugetlb_page() as noticed by Nick]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/hugetlb.h | 6 ------
+ arch/powerpc/mm/tlb-radix.c | 22 ++++++++++------------
+ 2 files changed, 10 insertions(+), 18 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
+index b8a0fb442c64..795d825c2edd 100644
+--- a/arch/powerpc/include/asm/hugetlb.h
++++ b/arch/powerpc/include/asm/hugetlb.h
+@@ -40,12 +40,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ return radix__flush_hugetlb_page(vma, vmaddr);
+ }
+
+-static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
+- unsigned long vmaddr)
+-{
+- if (radix_enabled())
+- return radix__local_flush_hugetlb_page(vma, vmaddr);
+-}
+ #else
+
+ static inline pte_t *hugepd_page(hugepd_t hpd)
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index 5a1f46eff3a2..3a07d7a5e2fe 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -164,7 +164,7 @@ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmadd
+ unsigned long ap = mmu_get_ap(psize);
+
+ preempt_disable();
+- pid = mm ? mm->context.id : 0;
++ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ preempt_enable();
+@@ -174,11 +174,10 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
+ {
+ #ifdef CONFIG_HUGETLB_PAGE
+ /* need the return fix for nohash.c */
+- if (vma && is_vm_hugetlb_page(vma))
+- return __local_flush_hugetlb_page(vma, vmaddr);
++ if (is_vm_hugetlb_page(vma))
++ return radix__local_flush_hugetlb_page(vma, vmaddr);
+ #endif
+- radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+- mmu_virtual_psize);
++ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+ }
+ EXPORT_SYMBOL(radix__local_flush_tlb_page);
+
+@@ -232,7 +231,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ unsigned long ap = mmu_get_ap(psize);
+
+ preempt_disable();
+- pid = mm ? mm->context.id : 0;
++ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto bail;
+ if (!mm_is_thread_local(mm))
+@@ -246,11 +245,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+ {
+ #ifdef CONFIG_HUGETLB_PAGE
+- if (vma && is_vm_hugetlb_page(vma))
+- return flush_hugetlb_page(vma, vmaddr);
++ if (is_vm_hugetlb_page(vma))
++ return radix__flush_hugetlb_page(vma, vmaddr);
+ #endif
+- radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+- mmu_virtual_psize);
++ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_page);
+
+@@ -330,7 +328,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+
+
+ preempt_disable();
+- pid = mm ? mm->context.id : 0;
++ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto err_out;
+
+@@ -361,7 +359,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ unsigned long pid, end;
+
+
+- pid = mm ? mm->context.id : 0;
++ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch b/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
index a5552326b7..f9d22d2256 100644
--- a/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
+++ b/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
@@ -12,23 +12,17 @@ No functionality change. Just code movement to ease code changes later
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-[mauricfo: backport:
- - hunk 1: update context line 1
- - hunk 2: update context lines, update whitespace in 2nd remove line
- - hunk 3: update context lines]
-Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
-
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
- arch/powerpc/mm/tlb-radix.c | 64 ++++++++++++++++++++++-----------------------
+ arch/powerpc/mm/tlb-radix.c | 64 ++++++++++++++++++-------------------
1 file changed, 32 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
-index e47ee8c..74354c2 100644
+index e47ee8c867c5..74354c26d316 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -119,6 +119,38 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
- trace_tlbie(0, 1, rb, rs, ric, prs, r);
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static inline void __tlbiel_va(unsigned long va, unsigned long pid,
@@ -67,11 +61,11 @@ index e47ee8c..74354c2 100644
* We use 128 set in radix mode and 256 set in hpt mode.
*/
@@ -171,22 +203,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
-- unsigned long ap, unsigned long ric)
+- unsigned long ap, unsigned long ric)
-{
- unsigned long rb,rs,prs,r;
-
@@ -86,9 +80,9 @@ index e47ee8c..74354c2 100644
- trace_tlbie(0, 1, rb, rs, ric, prs, r);
-}
-
- static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
- {
+ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
@@ -219,22 +235,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
asm volatile("ptesync": : :"memory");
}
@@ -109,9 +103,9 @@ index e47ee8c..74354c2 100644
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
- static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
- {
+ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
--
-cgit v1.1
+2.23.0
diff --git a/patches.suse/powerpc-mm-radix-implement-LPID-based-TLB-flushes-to.patch b/patches.suse/powerpc-mm-radix-implement-LPID-based-TLB-flushes-to.patch
new file mode 100644
index 0000000000..372c04a747
--- /dev/null
+++ b/patches.suse/powerpc-mm-radix-implement-LPID-based-TLB-flushes-to.patch
@@ -0,0 +1,296 @@
+From 0078778a86b14f85bf50e96d9ddeb3b70b55805d Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 9 May 2018 12:20:18 +1000
+Subject: [PATCH] powerpc/mm/radix: implement LPID based TLB flushes to be used
+ by KVM
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.18-rc1
+Git-commit: 0078778a86b14f85bf50e96d9ddeb3b70b55805d
+
+Implement a local TLB flush for invalidating an LPID with variants for
+process or partition scope. And a global TLB flush for invalidating
+a partition scoped page of an LPID.
+
+These will be used by KVM in subsequent patches.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ .../include/asm/book3s/64/tlbflush-radix.h | 7 +
+ arch/powerpc/mm/tlb-radix.c | 207 ++++++++++++++++++
+ 2 files changed, 214 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+index 19b45ba6caf9..ef5c3f2994c9 100644
+--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
++++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+@@ -51,4 +51,11 @@ extern void radix__flush_tlb_all(void);
+ extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
+ unsigned long address);
+
++extern void radix__flush_tlb_lpid_page(unsigned int lpid,
++ unsigned long addr,
++ unsigned long page_size);
++extern void radix__flush_pwc_lpid(unsigned int lpid);
++extern void radix__local_flush_tlb_lpid(unsigned int lpid);
++extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
++
+ #endif
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index a5d7309c2d05..5ac3206c51cc 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -118,6 +118,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbiel_lpid(unsigned long lpid, int set,
++ unsigned long ric)
++{
++ unsigned long rb,rs,prs,r;
++
++ rb = PPC_BIT(52); /* IS = 2 */
++ rb |= set << PPC_BITLSHIFT(51);
++ rs = 0; /* LPID comes from LPIDR */
++ prs = 0; /* partition scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
++}
++
++static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
++{
++ unsigned long rb,rs,prs,r;
++
++ rb = PPC_BIT(52); /* IS = 2 */
++ rs = lpid;
++ prs = 0; /* partition scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
++}
++
++static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
++ unsigned long ric)
++{
++ unsigned long rb,rs,prs,r;
++
++ rb = PPC_BIT(52); /* IS = 2 */
++ rb |= set << PPC_BITLSHIFT(51);
++ rs = 0; /* LPID comes from LPIDR */
++ prs = 1; /* process scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
++}
++
++
+ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
+ {
+@@ -150,6 +197,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
++ unsigned long ap, unsigned long ric)
++{
++ unsigned long rb,rs,prs,r;
++
++ rb = va & ~(PPC_BITMASK(52, 63));
++ rb |= ap << PPC_BITLSHIFT(58);
++ rs = lpid;
++ prs = 0; /* partition scoped */
++ r = 1; /* radix format */
++
++ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
++ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
++ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
++}
++
+ static inline void fixup_tlbie(void)
+ {
+ unsigned long pid = 0;
+@@ -161,6 +224,16 @@ static inline void fixup_tlbie(void)
+ }
+ }
+
++static inline void fixup_tlbie_lpid(unsigned long lpid)
++{
++ unsigned long va = ((1UL << 52) - 1);
++
++ if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
++ asm volatile("ptesync": : :"memory");
++ __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
++ }
++}
++
+ /*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ */
+@@ -214,6 +287,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
++static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
++{
++ int set;
++
++ VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
++
++ asm volatile("ptesync": : :"memory");
++
++ /*
++ * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
++ * also flush the entire Page Walk Cache.
++ */
++ __tlbiel_lpid(lpid, 0, ric);
++
++ /* For PWC, only one flush is needed */
++ if (ric == RIC_FLUSH_PWC) {
++ asm volatile("ptesync": : :"memory");
++ return;
++ }
++
++ /* For the remaining sets, just flush the TLB */
++ for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
++ __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
++
++ asm volatile("ptesync": : :"memory");
++ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
++}
++
++static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
++{
++ asm volatile("ptesync": : :"memory");
++
++ /*
++ * Workaround the fact that the "ric" argument to __tlbie_pid
++ * must be a compile-time contraint to match the "i" constraint
++ * in the asm statement.
++ */
++ switch (ric) {
++ case RIC_FLUSH_TLB:
++ __tlbie_lpid(lpid, RIC_FLUSH_TLB);
++ break;
++ case RIC_FLUSH_PWC:
++ __tlbie_lpid(lpid, RIC_FLUSH_PWC);
++ break;
++ case RIC_FLUSH_ALL:
++ default:
++ __tlbie_lpid(lpid, RIC_FLUSH_ALL);
++ }
++ fixup_tlbie_lpid(lpid);
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
++}
++
++static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
++{
++ int set;
++
++ VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
++
++ asm volatile("ptesync": : :"memory");
++
++ /*
++ * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
++ * also flush the entire Page Walk Cache.
++ */
++ __tlbiel_lpid_guest(lpid, 0, ric);
++
++ /* For PWC, only one flush is needed */
++ if (ric == RIC_FLUSH_PWC) {
++ asm volatile("ptesync": : :"memory");
++ return;
++ }
++
++ /* For the remaining sets, just flush the TLB */
++ for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
++ __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
++
++ asm volatile("ptesync": : :"memory");
++}
++
++
+ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+@@ -268,6 +421,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
++static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
++ unsigned long psize, unsigned long ric)
++{
++ unsigned long ap = mmu_get_ap(psize);
++
++ asm volatile("ptesync": : :"memory");
++ __tlbie_lpid_va(va, lpid, ap, ric);
++ fixup_tlbie_lpid(lpid);
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
++}
++
+ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+@@ -534,6 +698,49 @@ static int radix_get_mmu_psize(int page_size)
+ return psize;
+ }
+
++/*
++ * Flush partition scoped LPID address translation for all CPUs.
++ */
++void radix__flush_tlb_lpid_page(unsigned int lpid,
++ unsigned long addr,
++ unsigned long page_size)
++{
++ int psize = radix_get_mmu_psize(page_size);
++
++ _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
++}
++EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
++
++/*
++ * Flush partition scoped PWC from LPID for all CPUs.
++ */
++void radix__flush_pwc_lpid(unsigned int lpid)
++{
++ _tlbie_lpid(lpid, RIC_FLUSH_PWC);
++}
++EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
++
++/*
++ * Flush partition scoped translations from LPID (=LPIDR)
++ */
++void radix__local_flush_tlb_lpid(unsigned int lpid)
++{
++ _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
++}
++EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
++
++/*
++ * Flush process scoped translations from LPID (=LPIDR).
++ * Important difference, the guest normally manages its own translations,
++ * but some cases e.g., vCPU CPU migration require KVM to flush.
++ */
++void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
++{
++ _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
++}
++EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
++
++
+ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-module64-Fix-comment-in-R_PPC64_ENTRY-handli.patch b/patches.suse/powerpc-module64-Fix-comment-in-R_PPC64_ENTRY-handli.patch
new file mode 100644
index 0000000000..28fb519b43
--- /dev/null
+++ b/patches.suse/powerpc-module64-Fix-comment-in-R_PPC64_ENTRY-handli.patch
@@ -0,0 +1,35 @@
+From 2fb0a2c989837c976b68233496bbaefb47cd3d6f Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Sat, 6 Jul 2019 00:18:53 +1000
+Subject: [PATCH] powerpc/module64: Fix comment in R_PPC64_ENTRY handling
+
+References: bsc#1065729
+Patch-mainline: v5.3-rc1
+Git-commit: 2fb0a2c989837c976b68233496bbaefb47cd3d6f
+
+The comment here is wrong, the addi reads from r2 not r12. The code is
+correct, 0x38420000 = addi r2,r2,0.
+
+Fixes: a61674bdfc7c ("powerpc/module: Handle R_PPC64_ENTRY relocations")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/module_64.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
+index 8661eea78503..4bf81a111179 100644
+--- a/arch/powerpc/kernel/module_64.c
++++ b/arch/powerpc/kernel/module_64.c
+@@ -719,7 +719,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
+ /*
+ * If found, replace it with:
+ * addis r2, r12, (.TOC.-func)@ha
+- * addi r2, r12, (.TOC.-func)@l
++ * addi r2, r2, (.TOC.-func)@l
+ */
+ ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
+ ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-powernv-Fix-compile-without-CONFIG_TRACEPOIN.patch b/patches.suse/powerpc-powernv-Fix-compile-without-CONFIG_TRACEPOIN.patch
new file mode 100644
index 0000000000..2102496cb4
--- /dev/null
+++ b/patches.suse/powerpc-powernv-Fix-compile-without-CONFIG_TRACEPOIN.patch
@@ -0,0 +1,35 @@
+From 1702877621ff1c8a737857b71d379510267a17db Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Tue, 12 Mar 2019 16:07:12 +1100
+Subject: [PATCH] powerpc/powernv: Fix compile without CONFIG_TRACEPOINTS
+
+References: bsc#1065729
+Patch-mainline: v5.1-rc1
+Git-commit: 1702877621ff1c8a737857b71d379510267a17db
+
+The functions returns s64 but the return statement is missing.
+This adds the missing return statement.
+
+Fixes: 75d9fc7fd94e ("powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C")
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/opal-call.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
+index 578757d403ab..daad8c45c8e7 100644
+--- a/arch/powerpc/platforms/powernv/opal-call.c
++++ b/arch/powerpc/platforms/powernv/opal-call.c
+@@ -86,6 +86,7 @@ static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode, unsigned long msr)
+ {
++ return 0;
+ }
+
+ #define DO_TRACE false
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-powernv-move-OPAL-call-wrapper-tracing-and-i.patch b/patches.suse/powerpc-powernv-move-OPAL-call-wrapper-tracing-and-i.patch
new file mode 100644
index 0000000000..9546a24e46
--- /dev/null
+++ b/patches.suse/powerpc-powernv-move-OPAL-call-wrapper-tracing-and-i.patch
@@ -0,0 +1,719 @@
+From 75d9fc7fd94eb43cdf0bec04499a27ced780af19 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 26 Feb 2019 19:30:35 +1000
+Subject: [PATCH] powerpc/powernv: move OPAL call wrapper tracing and interrupt
+ handling to C
+
+References: bsc#1065729
+Patch-mainline: v5.1-rc1
+Git-commit: 75d9fc7fd94eb43cdf0bec04499a27ced780af19
+
+The OPAL call wrapper gets interrupt disabling wrong. It disables
+interrupts just by clearing MSR[EE], which has two problems:
+
+- It doesn't call into the IRQ tracing subsystem, which means tracing
+ across OPAL calls does not always notice IRQs have been disabled.
+
+- It doesn't go through the IRQ soft-mask code, which causes a minor
+ bug. MSR[EE] can not be restored by saving the MSR then clearing
+ MSR[EE], because a racing interrupt while soft-masked could clear
+ MSR[EE] between the two steps. This can cause MSR[EE] to be
+ incorrectly enabled when the OPAL call returns. Fortunately that
+ should only result in another masked interrupt being taken to
+ disable MSR[EE] again, but it's a bit sloppy.
+
+The existing code also saves MSR to PACA, which is not re-entrant if
+there is a nested OPAL call from different MSR contexts, which can
+happen these days with SRESET interrupts on bare metal.
+
+To fix these issues, move the tracing and IRQ handling code to C, and
+call into asm just for the low level call when everything is ready to
+go. Save the MSR on stack rather than PACA.
+
+Performance cost is kept to a minimum with a few optimisations:
+
+- The endian switch upon return is combined with the MSR restore,
+ which avoids an expensive context synchronizing operation for LE
+ kernels. This makes up for the additional mtmsrd to enable
+ interrupts with local_irq_enable().
+
+- blr is now used to return from the opal_* functions that are called
+ as C functions, to avoid link stack corruption. This requires a
+ skiboot fix as well to keep the call stack balanced.
+
+A NULL call is more costly after this, (410ns->430ns on POWER9), but
+OPAL calls are generally not performance critical at this scale.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/asm-prototypes.h | 10 +-
+ arch/powerpc/platforms/powernv/Makefile | 5 +-
+ arch/powerpc/platforms/powernv/opal-call.c | 283 ++++++++++++++
+ .../powerpc/platforms/powernv/opal-wrappers.S | 344 ++----------------
+ 4 files changed, 328 insertions(+), 314 deletions(-)
+ create mode 100644 arch/powerpc/platforms/powernv/opal-call.c
+
+--- a/arch/powerpc/include/asm/asm-prototypes.h
++++ b/arch/powerpc/include/asm/asm-prototypes.h
+@@ -38,13 +38,10 @@ extern struct static_key hcall_tracepoin
+ void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
+ void __trace_hcall_exit(long opcode, unsigned long retval,
+ unsigned long *retbuf);
+-/* OPAL tracing */
+-#ifdef HAVE_JUMP_LABEL
+-extern struct static_key opal_tracepoint_key;
+-#endif
+-
+-void __trace_opal_entry(unsigned long opcode, unsigned long *args);
+-void __trace_opal_exit(long opcode, unsigned long retval);
++/* OPAL */
++int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
++ int64_t a4, int64_t a5, int64_t a6, int64_t a7,
++ int64_t opcode, uint64_t msr);
+
+ /* VMX copying */
+ int enter_vmx_usercopy(void);
+--- a/arch/powerpc/platforms/powernv/Makefile
++++ b/arch/powerpc/platforms/powernv/Makefile
+@@ -1,5 +1,5 @@
+-obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
+-obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
++obj-y += setup.o opal-call.o opal-wrappers.o opal.o opal-async.o
++obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+ obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
+ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+ obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+@@ -10,6 +10,5 @@ obj-$(CONFIG_CXL_BASE) += pci-cxl.o
+ obj-$(CONFIG_EEH) += eeh-powernv.o
+ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
+ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
+-obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
+ obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+--- /dev/null
++++ b/arch/powerpc/platforms/powernv/opal-call.c
+@@ -0,0 +1,273 @@
++// SPDX-License-Identifier: GPL-2.0
++#include <linux/percpu.h>
++#include <linux/jump_label.h>
++#include <asm/opal-api.h>
++#include <asm/trace.h>
++#include <asm/asm-prototypes.h>
++
++#ifdef CONFIG_TRACEPOINTS
++/*
++ * Since the tracing code might execute OPAL calls we need to guard against
++ * recursion.
++ */
++static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
++
++static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3,
++ s64 a4, s64 a5, s64 a6, s64 a7,
++ unsigned long opcode)
++{
++ unsigned int *depth;
++ unsigned long args[8];
++
++ depth = this_cpu_ptr(&opal_trace_depth);
++
++ if (*depth)
++ return;
++
++ args[0] = a0;
++ args[1] = a1;
++ args[2] = a2;
++ args[3] = a3;
++ args[4] = a4;
++ args[5] = a5;
++ args[6] = a6;
++ args[7] = a7;
++
++ (*depth)++;
++ trace_opal_entry(opcode, &args[0]);
++ (*depth)--;
++}
++
++static void __trace_opal_exit(unsigned long opcode, unsigned long retval)
++{
++ unsigned int *depth;
++
++ depth = this_cpu_ptr(&opal_trace_depth);
++
++ if (*depth)
++ return;
++
++ (*depth)++;
++ trace_opal_exit(opcode, retval);
++ (*depth)--;
++}
++
++static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key);
++
++int opal_tracepoint_regfunc(void)
++{
++ static_branch_inc(&opal_tracepoint_key);
++ return 0;
++}
++
++void opal_tracepoint_unregfunc(void)
++{
++ static_branch_dec(&opal_tracepoint_key);
++}
++
++static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
++ s64 a4, s64 a5, s64 a6, s64 a7,
++ unsigned long opcode, unsigned long msr)
++{
++ s64 ret;
++
++ __trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode);
++ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
++ __trace_opal_exit(opcode, ret);
++
++ return ret;
++}
++
++#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key))
++
++#else /* CONFIG_TRACEPOINTS */
++
++static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
++ s64 a4, s64 a5, s64 a6, s64 a7,
++ unsigned long opcode, unsigned long msr)
++{
++}
++
++#define DO_TRACE false
++#endif /* CONFIG_TRACEPOINTS */
++
++static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
++ int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode)
++{
++ unsigned long flags;
++ unsigned long msr = mfmsr();
++ bool mmu = (msr & (MSR_IR|MSR_DR));
++ int64_t ret;
++
++ msr &= ~MSR_EE;
++
++ if (unlikely(!mmu))
++ return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
++
++ local_save_flags(flags);
++ hard_irq_disable();
++
++ if (DO_TRACE) {
++ ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
++ } else {
++ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
++ }
++
++ local_irq_restore(flags);
++
++ return ret;
++}
++
++#define OPAL_CALL(name, opcode) \
++int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
++ int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
++{ \
++ return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
++}
++
++OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
++OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
++OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
++OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
++OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
++OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
++OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
++OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
++OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
++OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
++OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
++OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
++OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
++OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
++OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
++OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
++OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
++OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
++OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
++OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
++OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
++OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
++OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
++OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
++OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
++OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
++OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
++OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
++OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
++OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
++OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
++OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
++OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
++OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
++OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
++OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE);
++OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE);
++OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
++OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
++OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
++OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
++OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
++OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
++OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
++OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
++OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
++OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
++OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
++OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
++OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
++OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
++OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
++OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
++OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
++OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
++OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
++OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
++OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
++OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
++OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
++OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
++OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
++OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
++OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
++OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
++OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
++OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
++OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
++OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
++OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
++OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
++OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
++OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
++OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
++OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
++OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
++OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
++OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
++OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
++OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
++OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
++OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
++OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
++OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
++OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC);
++OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
++OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
++OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
++OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
++OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
++OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
++OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
++OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
++OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
++OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
++OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
++OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE);
++OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
++OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
++OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
++OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
++OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
++OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
++OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
++OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
++OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
++OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
++OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
++OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
++OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE);
++OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
++OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
++OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
++OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
++OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
++OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
++OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
++OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
++OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
++OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
++OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
++OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
++OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
++OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
++OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
++OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
++OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
++OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
++OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
++OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
++OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
++OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
++OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
++OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
++OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
++OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
++OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
++OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
++OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
++OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
++OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
++OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
++OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
++OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
++OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
++OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
++++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
+@@ -15,307 +15,53 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/opal.h>
+
+- .section ".text"
+-
+-#ifdef CONFIG_TRACEPOINTS
+-#ifdef HAVE_JUMP_LABEL
+-#define OPAL_BRANCH(LABEL) \
+- ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
+-#else
+-
+- .section ".toc","aw"
+-
+- .globl opal_tracepoint_refcount
+-opal_tracepoint_refcount:
+- .llong 0
+-
+- .section ".text"
++ .section ".text"
+
+ /*
+- * We branch around this in early init by using an unconditional cpu
+- * feature.
++ * r3-r10 - OPAL call arguments
++ * STK_PARAM(R11) - OPAL opcode
++ * STK_PARAM(R12) - MSR to restore
+ */
+-#define OPAL_BRANCH(LABEL) \
+-BEGIN_FTR_SECTION; \
+- b 1f; \
+-END_FTR_SECTION(0, 1); \
+- ld r11,opal_tracepoint_refcount@toc(r2); \
+- cmpdi r11,0; \
+- bne- LABEL; \
+-1:
+-
+-#endif
+-
+-#else
+-#define OPAL_BRANCH(LABEL)
+-#endif
+-
+-/*
+- * DO_OPAL_CALL assumes:
+- * r0 = opal call token
+- * r12 = msr
+- * LR has been saved
+- */
+-#define DO_OPAL_CALL() \
+- mfcr r11; \
+- stw r11,8(r1); \
+- li r11,0; \
+- ori r11,r11,MSR_EE; \
+- std r12,PACASAVEDMSR(r13); \
+- andc r12,r12,r11; \
+- mtmsrd r12,1; \
+- LOAD_REG_ADDR(r11,opal_return); \
+- mtlr r11; \
+- li r11,MSR_DR|MSR_IR|MSR_LE;\
+- andc r12,r12,r11; \
+- mtspr SPRN_HSRR1,r12; \
+- LOAD_REG_ADDR(r11,opal); \
+- ld r12,8(r11); \
+- ld r2,0(r11); \
+- mtspr SPRN_HSRR0,r12; \
++_GLOBAL_TOC(__opal_call)
++ mflr r0
++ std r0,PPC_LR_STKOFF(r1)
++ ld r12,STK_PARAM(R12)(r1)
++ li r0,MSR_IR|MSR_DR|MSR_LE
++ andc r12,r12,r0
++ LOAD_REG_ADDR(r11, opal_return)
++ mtlr r11
++ LOAD_REG_ADDR(r11, opal)
++ ld r2,0(r11)
++ ld r11,8(r11)
++ mtspr SPRN_HSRR0,r11
++ mtspr SPRN_HSRR1,r12
++ /* set token to r0 */
++ ld r0,STK_PARAM(R11)(r1)
+ hrfid
+
+-#define OPAL_CALL(name, token) \
+- _GLOBAL_TOC(name); \
+- mfmsr r12; \
+- mflr r0; \
+- andi. r11,r12,MSR_IR|MSR_DR; \
+- std r0,PPC_LR_STKOFF(r1); \
+- li r0,token; \
+- beq opal_real_call; \
+- OPAL_BRANCH(opal_tracepoint_entry) \
+- DO_OPAL_CALL()
+-
+-
+ opal_return:
+ /*
+- * Fixup endian on OPAL return... we should be able to simplify
+- * this by instead converting the below trampoline to a set of
+- * bytes (always BE) since MSR:LE will end up fixed up as a side
+- * effect of the rfid.
++ * Restore MSR on OPAL return. The MSR is set to big-endian.
+ */
+- FIXUP_ENDIAN
+- ld r2,PACATOC(r13);
+- lwz r4,8(r1);
+- ld r5,PPC_LR_STKOFF(r1);
+- ld r6,PACASAVEDMSR(r13);
+- mtspr SPRN_SRR0,r5;
+- mtspr SPRN_SRR1,r6;
+- mtcr r4;
+- rfid
+-
+-opal_real_call:
+- mfcr r11
+- stw r11,8(r1)
+- /* Set opal return address */
+- LOAD_REG_ADDR(r11, opal_return_realmode)
+- mtlr r11
+- li r11,MSR_LE
+- andc r12,r12,r11
+- mtspr SPRN_HSRR1,r12
+- LOAD_REG_ADDR(r11,opal)
+- ld r12,8(r11)
+- ld r2,0(r11)
+- mtspr SPRN_HSRR0,r12
+- hrfid
+-
+-opal_return_realmode:
+- FIXUP_ENDIAN
+- ld r2,PACATOC(r13);
+- lwz r11,8(r1);
+- ld r12,PPC_LR_STKOFF(r1)
+- mtcr r11;
+- mtlr r12
+- blr
+-
+-#ifdef CONFIG_TRACEPOINTS
+-opal_tracepoint_entry:
+- stdu r1,-STACKFRAMESIZE(r1)
+- std r0,STK_REG(R23)(r1)
+- std r3,STK_REG(R24)(r1)
+- std r4,STK_REG(R25)(r1)
+- std r5,STK_REG(R26)(r1)
+- std r6,STK_REG(R27)(r1)
+- std r7,STK_REG(R28)(r1)
+- std r8,STK_REG(R29)(r1)
+- std r9,STK_REG(R30)(r1)
+- std r10,STK_REG(R31)(r1)
+- mr r3,r0
+- addi r4,r1,STK_REG(R24)
+- bl __trace_opal_entry
+- ld r0,STK_REG(R23)(r1)
+- ld r3,STK_REG(R24)(r1)
+- ld r4,STK_REG(R25)(r1)
+- ld r5,STK_REG(R26)(r1)
+- ld r6,STK_REG(R27)(r1)
+- ld r7,STK_REG(R28)(r1)
+- ld r8,STK_REG(R29)(r1)
+- ld r9,STK_REG(R30)(r1)
+- ld r10,STK_REG(R31)(r1)
+-
+- /* setup LR so we return via tracepoint_return */
+- LOAD_REG_ADDR(r11,opal_tracepoint_return)
+- std r11,16(r1)
+-
+- mfmsr r12
+- DO_OPAL_CALL()
+-
+-opal_tracepoint_return:
+- std r3,STK_REG(R31)(r1)
+- mr r4,r3
+- ld r3,STK_REG(R23)(r1)
+- bl __trace_opal_exit
+- ld r3,STK_REG(R31)(r1)
+- addi r1,r1,STACKFRAMESIZE
+- ld r0,16(r1)
++#ifdef __BIG_ENDIAN__
++ ld r11,STK_PARAM(R12)(r1)
++ mtmsrd r11
++#else
++ /* Endian can only be switched with rfi, must byte reverse MSR load */
++ .short 0x4039 /* li r10,STK_PARAM(R12) */
++ .byte (STK_PARAM(R12) >> 8) & 0xff
++ .byte STK_PARAM(R12) & 0xff
++
++ .long 0x280c6a7d /* ldbrx r11,r10,r1 */
++ .long 0x05009f42 /* bcl 20,31,$+4 */
++ .long 0xa602487d /* mflr r10 */
++ .long 0x14004a39 /* addi r10,r10,20 */
++ .long 0xa64b5a7d /* mthsrr0 r10 */
++ .long 0xa64b7b7d /* mthsrr1 r11 */
++ .long 0x2402004c /* hrfid */
++#endif
++ ld r2,PACATOC(r13)
++ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+-#endif
+-
+
+-OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
+-OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
+-OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
+-OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+-OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
+-OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
+-OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
+-OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
+-OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
+-OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
+-OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
+-OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
+-OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
+-OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
+-OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
+-OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
+-OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
+-OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
+-OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
+-OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
+-OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
+-OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
+-OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
+-OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
+-OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
+-OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
+-OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
+-OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
+-OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
+-OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
+-OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
+-OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
+-OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
+-OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
+-OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
+-OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE);
+-OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE);
+-OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
+-OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
+-OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
+-OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
+-OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
+-OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
+-OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
+-OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
+-OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
+-OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
+-OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
+-OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
+-OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
+-OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
+-OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
+-OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
+-OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
+-OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
+-OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
+-OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
+-OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
+-OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
+-OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
+-OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
+-OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
+-OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
+-OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
+-OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
+-OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
+-OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
+-OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
+-OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
+-OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
+-OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
+-OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
+-OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
+-OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
+-OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
+-OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
+-OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
+-OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
+-OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
+-OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
+-OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
+-OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
+-OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
+-OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
+-OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC);
+-OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
+-OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
+-OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
+-OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
+-OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
+-OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
+-OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+-OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
+-OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
+-OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
+-OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
+-OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE);
+-OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
+-OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
+-OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
+-OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+-OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
+-OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
+-OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
+-OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
+-OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
+-OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
+-OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+-OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
+-OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE);
+-OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
+-OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
+-OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
+-OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+-OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
+-OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+-OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+-OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
+-OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
+-OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
+-OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
+-OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
+-OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
+-OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
+-OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
+-OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
+-OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
+-OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
+-OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
+-OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
+-OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
+-OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
+-OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
+-OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+-OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
+-OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
+-OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+-OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+-OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+-OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
+-OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
+-OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
+-OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
+-OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
+-OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
+-OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
diff --git a/patches.suse/powerpc-powernv-npu-Remove-obsolete-comment-about-TC.patch b/patches.suse/powerpc-powernv-npu-Remove-obsolete-comment-about-TC.patch
new file mode 100644
index 0000000000..20cdc76df2
--- /dev/null
+++ b/patches.suse/powerpc-powernv-npu-Remove-obsolete-comment-about-TC.patch
@@ -0,0 +1,34 @@
+From 797eadd9c80ca3b3f913ccde29f8a6015f9974f9 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@ozlabs.ru>
+Date: Mon, 14 Jan 2019 11:41:38 +1100
+Subject: [PATCH] powerpc/powernv/npu: Remove obsolete comment about
+ TCE_KILL_INVAL_ALL
+
+References: bsc#1065729
+Patch-mainline: v5.1-rc1
+Git-commit: 797eadd9c80ca3b3f913ccde29f8a6015f9974f9
+
+TCE_KILL_INVAL_ALL has moved long ago but the comment was forgotted so
+finish the move and remove the comment.
+
+Fixes: 0bbcdb437da0c4a "powerpc/powernv/npu: TCE Kill helpers cleanup"
+Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/powernv/npu-dma.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/arch/powerpc/platforms/powernv/npu-dma.c
++++ b/arch/powerpc/platforms/powernv/npu-dma.c
+@@ -46,10 +46,6 @@ static DEFINE_SPINLOCK(npu_context_lock)
+ */
+ #define ATSD_THRESHOLD (2*1024*1024)
+
+-/*
+- * Other types of TCE cache invalidation are not functional in the
+- * hardware.
+- */
+ static struct pci_dev *get_pci_dev(struct device_node *dn)
+ {
+ return PCI_DN(dn)->pcidev;
diff --git a/patches.suse/powerpc-pseries-Call-H_BLOCK_REMOVE-when-supported.patch b/patches.suse/powerpc-pseries-Call-H_BLOCK_REMOVE-when-supported.patch
new file mode 100644
index 0000000000..8be5656f5b
--- /dev/null
+++ b/patches.suse/powerpc-pseries-Call-H_BLOCK_REMOVE-when-supported.patch
@@ -0,0 +1,137 @@
+From 59545ebe331917afcb1ebbd6f3e5dc3ed51beb05 Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 20 Sep 2019 15:05:23 +0200
+Subject: [PATCH] powerpc/pseries: Call H_BLOCK_REMOVE when supported
+
+References: bsc#1109158
+Patch-mainline: v5.4-rc1
+Git-commit: 59545ebe331917afcb1ebbd6f3e5dc3ed51beb05
+
+Depending on the hardware and the hypervisor, the hcall H_BLOCK_REMOVE
+may not be able to process all the page sizes for a segment base page
+size, as reported by the TLB Invalidate Characteristics.
+
+For each pair of base segment page size and actual page size, this
+characteristic tells us the size of the block the hcall supports.
+
+In the case, the hcall is not supporting a pair of base segment page
+size, actual page size, it is returning H_PARAM which leads to a panic
+like this:
+
+ kernel BUG at /home/srikar/work/linux.git/arch/powerpc/platforms/pseries/lpar.c:466!
+ Oops: Exception in kernel mode, sig: 5 [#1]
+ BE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+ Modules linked in:
+ CPU: 28 PID: 583 Comm: modprobe Not tainted 5.2.0-master #5
+ NIP: c0000000000be8dc LR: c0000000000be880 CTR: 0000000000000000
+ REGS: c0000007e77fb130 TRAP: 0700 Not tainted (5.2.0-master)
+ MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI> CR: 42224824 XER: 20000000
+ CFAR: c0000000000be8fc IRQMASK: 0
+ GPR00: 0000000022224828 c0000007e77fb3c0 c000000001434d00 0000000000000005
+ GPR04: 9000000004fa8c00 0000000000000000 0000000000000003 0000000000000001
+ GPR08: c0000007e77fb450 0000000000000000 0000000000000001 ffffffffffffffff
+ GPR12: c0000007e77fb450 c00000000edfcb80 0000cd7d3ea30000 c0000000016022b0
+ GPR16: 00000000000000b0 0000cd7d3ea30000 0000000000000001 c080001f04f00105
+ GPR20: 0000000000000003 0000000000000004 c000000fbeb05f58 c000000001602200
+ GPR24: 0000000000000000 0000000000000004 8800000000000000 c000000000c5d148
+ GPR28: c000000000000000 8000000000000000 a000000000000000 c0000007e77fb580
+ NIP [c0000000000be8dc] .call_block_remove+0x12c/0x220
+ LR [c0000000000be880] .call_block_remove+0xd0/0x220
+ Call Trace:
+ 0xc000000fb8c00240 (unreliable)
+ .pSeries_lpar_flush_hash_range+0x578/0x670
+ .flush_hash_range+0x44/0x100
+ .__flush_tlb_pending+0x3c/0xc0
+ .zap_pte_range+0x7ec/0x830
+ .unmap_page_range+0x3f4/0x540
+ .unmap_vmas+0x94/0x120
+ .exit_mmap+0xac/0x1f0
+ .mmput+0x9c/0x1f0
+ .do_exit+0x388/0xd60
+ .do_group_exit+0x54/0x100
+ .__se_sys_exit_group+0x14/0x20
+ system_call+0x5c/0x70
+ Instruction dump:
+ 39400001 38a00000 4800003c 60000000 60420000 7fa9e800 38e00000 419e0014
+ 7d29d278 7d290074 7929d182 69270001 <0b070000> 7d495378 394a0001 7fa93040
+
+The call to H_BLOCK_REMOVE should only be made for the supported pair
+of base segment page size, actual page size and using the correct
+maximum block size.
+
+Due to the required complexity in do_block_remove() and
+call_block_remove(), and the fact that currently a block size of 8 is
+returned by the hypervisor, we are only supporting 8 size block to the
+H_BLOCK_REMOVE hcall.
+
+In order to identify this limitation easily in the code, a local
+define HBLKR_SUPPORTED_SIZE defining the currently supported block
+size, and a dedicated checking helper is_supported_hlbkr() are
+introduced.
+
+For regular pages and hugetlb, the assumption is made that the page
+size is equal to the base page size. For THP the page size is assumed
+to be 16M.
+
+Fixes: ba2dd8a26baa ("powerpc/pseries/mm: call H_BLOCK_REMOVE")
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190920130523.20441-3-ldufour@linux.ibm.com
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/pseries/lpar.c | 23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -73,6 +73,13 @@ EXPORT_SYMBOL(plpar_hcall_norets);
+ */
+ static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+
++/*
++ * Due to the involved complexity, and that the current hypervisor is only
++ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
++ * buffer size to 8 size block.
++ */
++#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
++
+ void vpa_init(int cpu)
+ {
+ int hwcpu = get_hard_smp_processor_id(cpu);
+@@ -434,6 +441,17 @@ static void pSeries_lpar_hpte_invalidate
+ #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
+ #define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+
++/*
++ * Returned true if we are supporting this block size for the specified segment
++ * base page size and actual page size.
++ *
++ * Currently, we only support 8 size block.
++ */
++static inline bool is_supported_hlbkrm(int bpsize, int psize)
++{
++ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
++}
++
+ /**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+@@ -593,7 +611,8 @@ static inline void __pSeries_lpar_hugepa
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
++ /* Assuming THP size is 16M */
++ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
+ hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+ else
+ hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
+@@ -911,7 +930,7 @@ static void pSeries_lpar_flush_hash_rang
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+- if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
++ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
+ do_block_remove(number, batch, param);
+ goto out;
+ }
diff --git a/patches.suse/powerpc-pseries-Read-TLB-Block-Invalidate-Characteri.patch b/patches.suse/powerpc-pseries-Read-TLB-Block-Invalidate-Characteri.patch
new file mode 100644
index 0000000000..63c7152d18
--- /dev/null
+++ b/patches.suse/powerpc-pseries-Read-TLB-Block-Invalidate-Characteri.patch
@@ -0,0 +1,207 @@
+From 1211ee61b4a8e60d6dc77211cdcf01906915bfba Mon Sep 17 00:00:00 2001
+From: Laurent Dufour <ldufour@linux.ibm.com>
+Date: Fri, 20 Sep 2019 15:05:22 +0200
+Subject: [PATCH] powerpc/pseries: Read TLB Block Invalidate Characteristics
+
+References: bsc#1109158
+Patch-mainline: v5.4-rc1
+Git-commit: 1211ee61b4a8e60d6dc77211cdcf01906915bfba
+
+The PAPR document specifies the TLB Block Invalidate Characteristics
+which tells for each pair of segment base page size, actual page size,
+the size of the block the hcall H_BLOCK_REMOVE supports.
+
+These characteristics are loaded at boot time in a new table
+hblkr_size. The table is separate from the mmu_psize_def because this
+is specific to the pseries platform.
+
+A new init function, pseries_lpar_read_hblkrm_characteristics() is
+added to read the characteristics. It is called from
+pSeries_setup_arch().
+
+Fixes: ba2dd8a26baa ("powerpc/pseries/mm: call H_BLOCK_REMOVE")
+Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190920130523.20441-2-ldufour@linux.ibm.com
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/platforms/pseries/lpar.c | 140 +++++++++++++++++++++++
+ arch/powerpc/platforms/pseries/pseries.h | 1 +
+ arch/powerpc/platforms/pseries/setup.c | 1 +
+ 3 files changed, 142 insertions(+)
+
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -64,6 +64,15 @@ EXPORT_SYMBOL(plpar_hcall);
+ EXPORT_SYMBOL(plpar_hcall9);
+ EXPORT_SYMBOL(plpar_hcall_norets);
+
++/*
++ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
++ * page size is that page size.
++ *
++ * The first index is the segment base page size, the second one is the actual
++ * page size.
++ */
++static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
++
+ void vpa_init(int cpu)
+ {
+ int hwcpu = get_hard_smp_processor_id(cpu);
+@@ -753,6 +762,137 @@ static void do_block_remove(unsigned lon
+ }
+
+ /*
++ * TLB Block Invalidate Characteristics
++ *
++ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
++ * is able to process for each couple segment base page size, actual page size.
++ *
++ * The ibm,get-system-parameter properties is returning a buffer with the
++ * following layout:
++ *
++ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
++ * -----------------
++ * TLB Block Invalidate Specifiers:
++ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
++ * [ 1 byte Number of page sizes (N) that are supported for the specified
++ * TLB invalidate block size ]
++ * [ 1 byte Encoded segment base page size and actual page size
++ * MSB=0 means 4k segment base page size and actual page size
++ * MSB=1 the penc value in mmu_psize_def ]
++ * ...
++ * -----------------
++ * Next TLB Block Invalidate Specifiers...
++ * -----------------
++ * [ 0 ]
++ */
++static inline void set_hblkrm_bloc_size(int bpsize, int psize,
++ unsigned int block_size)
++{
++ if (block_size > hblkrm_size[bpsize][psize])
++ hblkrm_size[bpsize][psize] = block_size;
++}
++
++/*
++ * Decode the Encoded segment base page size and actual page size.
++ * PAPR specifies:
++ * - bit 7 is the L bit
++ * - bits 0-5 are the penc value
++ * If the L bit is 0, this means 4K segment base page size and actual page size
++ * otherwise the penc value should be read.
++ */
++#define HBLKRM_L_MASK 0x80
++#define HBLKRM_PENC_MASK 0x3f
++static inline void __init check_lp_set_hblkrm(unsigned int lp,
++ unsigned int block_size)
++{
++ unsigned int bpsize, psize;
++
++ /* First, check the L bit, if not set, this means 4K */
++ if ((lp & HBLKRM_L_MASK) == 0) {
++ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
++ return;
++ }
++
++ lp &= HBLKRM_PENC_MASK;
++ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
++ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
++
++ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
++ if (def->penc[psize] == lp) {
++ set_hblkrm_bloc_size(bpsize, psize, block_size);
++ return;
++ }
++ }
++ }
++}
++
++#define SPLPAR_TLB_BIC_TOKEN 50
++
++/*
++ * The size of the TLB Block Invalidate Characteristics is variable. But at the
++ * maximum it will be the number of possible page sizes *2 + 10 bytes.
++ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
++ * (128 bytes) for the buffer to get plenty of space.
++ */
++#define SPLPAR_TLB_BIC_MAXLENGTH 128
++
++void __init pseries_lpar_read_hblkrm_characteristics(void)
++{
++ unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
++ int call_status, len, idx, bpsize;
++
++ spin_lock(&rtas_data_buf_lock);
++ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
++ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
++ NULL,
++ SPLPAR_TLB_BIC_TOKEN,
++ __pa(rtas_data_buf),
++ RTAS_DATA_BUF_SIZE);
++ memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
++ local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
++ spin_unlock(&rtas_data_buf_lock);
++
++ if (call_status != 0) {
++ pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
++ __FILE__, __func__, call_status);
++ return;
++ }
++
++ /*
++ * The first two (2) bytes of the data in the buffer are the length of
++ * the returned data, not counting these first two (2) bytes.
++ */
++ len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
++ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
++ pr_warn("%s too large returned buffer %d", __func__, len);
++ return;
++ }
++
++ idx = 2;
++ while (idx < len) {
++ u8 block_shift = local_buffer[idx++];
++ u32 block_size;
++ unsigned int npsize;
++
++ if (!block_shift)
++ break;
++
++ block_size = 1 << block_shift;
++
++ for (npsize = local_buffer[idx++];
++ npsize > 0 && idx < len; npsize--)
++ check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
++ block_size);
++ }
++
++ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
++ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
++ if (hblkrm_size[bpsize][idx])
++ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
++ bpsize, idx, hblkrm_size[bpsize][idx]);
++}
++
++/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+--- a/arch/powerpc/platforms/pseries/pseries.h
++++ b/arch/powerpc/platforms/pseries/pseries.h
+@@ -103,5 +103,6 @@ static inline unsigned long cmo_get_page
+ int dlpar_workqueue_init(void);
+
+ void pseries_setup_rfi_flush(void);
++void pseries_lpar_read_hblkrm_characteristics(void);
+
+ #endif /* _PSERIES_PSERIES_H */
+--- a/arch/powerpc/platforms/pseries/setup.c
++++ b/arch/powerpc/platforms/pseries/setup.c
+@@ -759,6 +759,7 @@ static void __init pSeries_setup_arch(vo
+
+ pseries_setup_rfi_flush();
+ setup_stf_barrier();
++ pseries_lpar_read_hblkrm_characteristics();
+
+ /* By default, only probe PCI (can be overridden by rtas_pci) */
+ pci_add_flags(PCI_PROBE_ONLY);
diff --git a/patches.suse/powerpc-pseries-memory-hotplug-Fix-return-value-type.patch b/patches.suse/powerpc-pseries-memory-hotplug-Fix-return-value-type.patch
new file mode 100644
index 0000000000..cbdad2434d
--- /dev/null
+++ b/patches.suse/powerpc-pseries-memory-hotplug-Fix-return-value-type.patch
@@ -0,0 +1,186 @@
+From b45e9d761ba2d60044b610297e3ef9f947ac157f Mon Sep 17 00:00:00 2001
+From: YueHaibing <yuehaibing@huawei.com>
+Date: Tue, 9 Oct 2018 21:59:13 +0800
+Subject: [PATCH] powerpc/pseries/memory-hotplug: Fix return value type of
+ find_aa_index
+
+References: bsc#1065729
+Patch-mainline: v4.20-rc1
+Git-commit: b45e9d761ba2d60044b610297e3ef9f947ac157f
+
+The variable 'aa_index' is defined as an unsigned value in
+update_lmb_associativity_index(), but find_aa_index() may return -1
+when dlpar_clone_property() fails. So change find_aa_index() to return
+a bool, which indicates whether 'aa_index' was found or not.
+
+[in 4.12 there is additional indirection so need to pass aa_index twice]
+
+Fixes: c05a5a40969e ("powerpc/pseries: Dynamic add entires to associativity lookup array")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Reviewed-by: Nathan Fontenot nfont@linux.vnet.ibm.com>
+[mpe: Tweak changelog, rename is_found to just found]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ .../platforms/pseries/hotplug-memory.c | 61 +++++++++----------
+ 1 file changed, 28 insertions(+), 33 deletions(-)
+
+--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
+@@ -101,11 +101,12 @@ static struct property *dlpar_clone_prop
+ return new_prop;
+ }
+
+-static u32 find_aa_index(struct device_node *dr_node,
+- struct property *ala_prop, const u32 *lmb_assoc)
++static bool find_aa_index(struct device_node *dr_node,
++ struct property *ala_prop,
++ const u32 *lmb_assoc, u32 *aa_index)
+ {
+- u32 *assoc_arrays;
+- u32 aa_index;
++ u32 *assoc_arrays, new_prop_size;
++ struct property *new_prop;
+ int aa_arrays, aa_array_entries, aa_array_sz;
+ int i, index;
+
+@@ -121,75 +122,68 @@ static u32 find_aa_index(struct device_n
+ aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+ aa_array_sz = aa_array_entries * sizeof(u32);
+
+- aa_index = -1;
+ for (i = 0; i < aa_arrays; i++) {
+ index = (i * aa_array_entries) + 2;
+
+ if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
+ continue;
+
+- aa_index = i;
+- break;
++ *aa_index = i;
++ return true;
+ }
+
+- if (aa_index == -1) {
+- struct property *new_prop;
+- u32 new_prop_size;
+-
+- new_prop_size = ala_prop->length + aa_array_sz;
+- new_prop = dlpar_clone_property(ala_prop, new_prop_size);
+- if (!new_prop)
+- return -1;
+-
+- assoc_arrays = new_prop->value;
+-
+- /* increment the number of entries in the lookup array */
+- assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+-
+- /* copy the new associativity into the lookup array */
+- index = aa_arrays * aa_array_entries + 2;
+- memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
+-
+- of_update_property(dr_node, new_prop);
+-
+- /*
+- * The associativity lookup array index for this lmb is
+- * number of entries - 1 since we added its associativity
+- * to the end of the lookup array.
+- */
+- aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
+- }
++ new_prop_size = ala_prop->length + aa_array_sz;
++ new_prop = dlpar_clone_property(ala_prop, new_prop_size);
++ if (!new_prop)
++ return false;
++
++ assoc_arrays = new_prop->value;
++
++ /* increment the number of entries in the lookup array */
++ assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+
+- return aa_index;
++ /* copy the new associativity into the lookup array */
++ index = aa_arrays * aa_array_entries + 2;
++ memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
++
++ of_update_property(dr_node, new_prop);
++
++ /*
++ * The associativity lookup array index for this lmb is
++ * number of entries - 1 since we added its associativity
++ * to the end of the lookup array.
++ */
++ *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
++ return true;
+ }
+
+-static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
++static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb, u32 *aa_index)
+ {
+ struct device_node *parent, *lmb_node, *dr_node;
+ struct property *ala_prop;
+ const u32 *lmb_assoc;
+- u32 aa_index;
++ bool found;
+
+ parent = of_find_node_by_path("/");
+ if (!parent)
+- return -ENODEV;
++ return false;
+
+ lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+ parent);
+ of_node_put(parent);
+ if (!lmb_node)
+- return -EINVAL;
++ return false;
+
+ lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+ if (!lmb_assoc) {
+ dlpar_free_cc_nodes(lmb_node);
+- return -ENODEV;
++ return false;
+ }
+
+ dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dr_node) {
+ dlpar_free_cc_nodes(lmb_node);
+- return -ENODEV;
++ return false;
+ }
+
+ ala_prop = of_find_property(dr_node, "ibm,associativity-lookup-arrays",
+@@ -197,26 +191,27 @@ static u32 lookup_lmb_associativity_inde
+ if (!ala_prop) {
+ of_node_put(dr_node);
+ dlpar_free_cc_nodes(lmb_node);
+- return -ENODEV;
++ return false;
+ }
+
+- aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
++ found = find_aa_index(dr_node, ala_prop, lmb_assoc, aa_index);
+
+ dlpar_free_cc_nodes(lmb_node);
+- return aa_index;
++ return found;
+ }
+
+ static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
+ {
+ int rc, aa_index;
++ bool found;
+
+ lmb->flags |= DRCONF_MEM_ASSIGNED;
+
+- aa_index = lookup_lmb_associativity_index(lmb);
+- if (aa_index < 0) {
++ found = lookup_lmb_associativity_index(lmb, &aa_index);
++ if (!found) {
+ pr_err("Couldn't find associativity index for drc index %x\n",
+ lmb->drc_index);
+- return aa_index;
++ return -ENODEV;
+ }
+
+ lmb->aa_index = aa_index;
diff --git a/patches.suse/powerpc-xive-Fix-bogus-error-code-returned-by-OPAL.patch b/patches.suse/powerpc-xive-Fix-bogus-error-code-returned-by-OPAL.patch
new file mode 100644
index 0000000000..2cd9631a4f
--- /dev/null
+++ b/patches.suse/powerpc-xive-Fix-bogus-error-code-returned-by-OPAL.patch
@@ -0,0 +1,92 @@
+From 6ccb4ac2bf8a35c694ead92f8ac5530a16e8f2c8 Mon Sep 17 00:00:00 2001
+From: Greg Kurz <groug@kaod.org>
+Date: Wed, 11 Sep 2019 17:52:18 +0200
+Subject: [PATCH] powerpc/xive: Fix bogus error code returned by OPAL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+References: bsc#1065729
+Patch-mainline: v5.4-rc1
+Git-commit: 6ccb4ac2bf8a35c694ead92f8ac5530a16e8f2c8
+
+There's a bug in skiboot that causes the OPAL_XIVE_ALLOCATE_IRQ call
+to return the 32-bit value 0xffffffff when OPAL has run out of IRQs.
+Unfortunatelty, OPAL return values are signed 64-bit entities and
+errors are supposed to be negative. If that happens, the linux code
+confusingly treats 0xffffffff as a valid IRQ number and panics at some
+point.
+
+A fix was recently merged in skiboot:
+
+e97391ae2bb5 ("xive: fix return value of opal_xive_allocate_irq()")
+
+but we need a workaround anyway to support older skiboots already
+in the field.
+
+Internally convert 0xffffffff to OPAL_RESOURCE which is the usual error
+returned upon resource exhaustion.
+
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Greg Kurz <groug@kaod.org>
+Reviewed-by: Cédric Le Goater <clg@kaod.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/156821713818.1985334.14123187368108582810.stgit@bahia.lan
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/opal.h | 2 +-
+ arch/powerpc/platforms/powernv/opal-call.c | 2 +-
+ arch/powerpc/sysdev/xive/native.c | 11 +++++++++++
+ 3 files changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
+index 57bd029c715e..d5a0807d21db 100644
+--- a/arch/powerpc/include/asm/opal.h
++++ b/arch/powerpc/include/asm/opal.h
+@@ -272,7 +272,7 @@ int64_t opal_xive_get_vp_info(uint64_t vp,
+ int64_t opal_xive_set_vp_info(uint64_t vp,
+ uint64_t flags,
+ uint64_t report_cl_pair);
+-int64_t opal_xive_allocate_irq(uint32_t chip_id);
++int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
+ int64_t opal_xive_free_irq(uint32_t girq);
+ int64_t opal_xive_sync(uint32_t type, uint32_t id);
+ int64_t opal_xive_dump(uint32_t type, uint32_t id);
+diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
+index 29ca523c1c79..dccdc9df5213 100644
+--- a/arch/powerpc/platforms/powernv/opal-call.c
++++ b/arch/powerpc/platforms/powernv/opal-call.c
+@@ -257,7 +257,7 @@ OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
+ OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
+ OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
+ OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
+-OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
++OPAL_CALL(opal_xive_allocate_irq_raw, OPAL_XIVE_ALLOCATE_IRQ);
+ OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
+ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
+ OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
+diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
+index 4b61e44f0171..e9481468ebd8 100644
+--- a/arch/powerpc/sysdev/xive/native.c
++++ b/arch/powerpc/sysdev/xive/native.c
+@@ -245,6 +245,17 @@ static bool xive_native_match(struct device_node *node)
+ return of_device_is_compatible(node, "ibm,opal-xive-vc");
+ }
+
++static s64 opal_xive_allocate_irq(u32 chip_id)
++{
++ s64 irq = opal_xive_allocate_irq_raw(chip_id);
++
++ /*
++ * Old versions of skiboot can incorrectly return 0xffffffff to
++ * indicate no space, fix it up here.
++ */
++ return irq == 0xffffffff ? OPAL_RESOURCE : irq;
++}
++
+ #ifdef CONFIG_SMP
+ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+ {
+--
+2.23.0
+
diff --git a/patches.suse/ppp-Fix-memory-leak-in-ppp_write.patch b/patches.suse/ppp-Fix-memory-leak-in-ppp_write.patch
new file mode 100644
index 0000000000..e76351037a
--- /dev/null
+++ b/patches.suse/ppp-Fix-memory-leak-in-ppp_write.patch
@@ -0,0 +1,59 @@
+From: Takeshi Misawa <jeliantsurux@gmail.com>
+Date: Sun, 22 Sep 2019 16:45:31 +0900
+Subject: ppp: Fix memory leak in ppp_write
+Git-commit: 4c247de564f1ff614d11b3bb5313fb70d7b9598b
+Patch-mainline: 5.4-rc1
+References: git-fixes
+
+When ppp is closing, __ppp_xmit_process() failed to enqueue skb
+and skb allocated in ppp_write() is leaked.
+
+syzbot reported :
+BUG: memory leak
+unreferenced object 0xffff88812a17bc00 (size 224):
+ comm "syz-executor673", pid 6952, jiffies 4294942888 (age 13.040s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<00000000d110fff9>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline]
+ [<00000000d110fff9>] slab_post_alloc_hook mm/slab.h:522 [inline]
+ [<00000000d110fff9>] slab_alloc_node mm/slab.c:3262 [inline]
+ [<00000000d110fff9>] kmem_cache_alloc_node+0x163/0x2f0 mm/slab.c:3574
+ [<000000002d616113>] __alloc_skb+0x6e/0x210 net/core/skbuff.c:197
+ [<000000000167fc45>] alloc_skb include/linux/skbuff.h:1055 [inline]
+ [<000000000167fc45>] ppp_write+0x48/0x120 drivers/net/ppp/ppp_generic.c:502
+ [<000000009ab42c0b>] __vfs_write+0x43/0xa0 fs/read_write.c:494
+ [<00000000086b2e22>] vfs_write fs/read_write.c:558 [inline]
+ [<00000000086b2e22>] vfs_write+0xee/0x210 fs/read_write.c:542
+ [<00000000a2b70ef9>] ksys_write+0x7c/0x130 fs/read_write.c:611
+ [<00000000ce5e0fdd>] __do_sys_write fs/read_write.c:623 [inline]
+ [<00000000ce5e0fdd>] __se_sys_write fs/read_write.c:620 [inline]
+ [<00000000ce5e0fdd>] __x64_sys_write+0x1e/0x30 fs/read_write.c:620
+ [<00000000d9d7b370>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296
+ [<0000000006e6d506>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fix this by freeing skb, if ppp is closing.
+
+Fixes: 6d066734e9f0 ("ppp: avoid loop in xmit recursion detection code")
+Reported-and-tested-by: syzbot+d9c8bf24e56416d7ce2c@syzkaller.appspotmail.com
+Signed-off-by: Takeshi Misawa <jeliantsurux@gmail.com>
+Reviewed-by: Guillaume Nault <gnault@redhat.com>
+Tested-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+---
+ drivers/net/ppp/ppp_generic.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1432,6 +1432,8 @@ static void __ppp_xmit_process(struct pp
+ netif_wake_queue(ppp->dev);
+ else
+ netif_stop_queue(ppp->dev);
++ } else {
++ kfree_skb(skb);
+ }
+ ppp_xmit_unlock(ppp);
+ }
diff --git a/patches.suse/scsi-scsi_dh_rdac-zero-cdb-in-send_mode_select.patch b/patches.suse/scsi-scsi_dh_rdac-zero-cdb-in-send_mode_select.patch
new file mode 100644
index 0000000000..397b8d6a78
--- /dev/null
+++ b/patches.suse/scsi-scsi_dh_rdac-zero-cdb-in-send_mode_select.patch
@@ -0,0 +1,44 @@
+From: Martin Wilck <Martin.Wilck@suse.com>
+Date: Wed, 4 Sep 2019 15:52:29 +0000
+Subject: scsi: scsi_dh_rdac: zero cdb in send_mode_select()
+Patch-mainline: v5.4-rc1
+Git-commit: 57adf5d4cfd3198aa480e7c94a101fc8c4e6109d
+References: bsc#1149313
+
+cdb in send_mode_select() is not zeroed and is only partially filled in
+rdac_failover_get(), which leads to some random data getting to the
+device. Users have reported storage responding to such commands with
+INVALID FIELD IN CDB. Code before commit 327825574132 was not affected, as
+it called blk_rq_set_block_pc().
+
+Fix this by zeroing out the cdb first.
+
+Identified & fix proposed by HPE.
+
+Fixes: 327825574132 ("scsi_dh_rdac: switch to scsi_execute_req_flags()")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20190904155205.1666-1-martin.wilck@suse.com
+Signed-off-by: Martin Wilck <mwilck@suse.com>
+Acked-by: Ales Novak <alnovak@suse.cz>
+Reviewed-by: Shane Seymour <shane.seymour@hpe.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+---
+ drivers/scsi/device_handler/scsi_dh_rdac.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
+index 65f1fe3..5efc959 100644
+--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
++++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
+@@ -546,6 +546,8 @@ static void send_mode_select(struct work_struct *work)
+ spin_unlock(&ctlr->ms_lock);
+
+ retry:
++ memset(cdb, 0, sizeof(cdb));
++
+ data_size = rdac_failover_get(ctlr, &list, cdb);
+
+ RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
+--
+2.23.0
+
diff --git a/series.conf b/series.conf
index f83aed14e3..5578a00731 100644
--- a/series.conf
+++ b/series.conf
@@ -7882,7 +7882,6 @@
patches.suse/0002-mmc-dw_mmc-Fix-the-DTO-timeout-calculation.patch
patches.suse/powerpc-perf-Fix-IMC-allocation-routine.patch
patches.suse/powerpc-Fix-check-for-copy-paste-instructions-in-ali.patch
- patches.suse/powerpc-64s-radix-Fix-preempt-imbalance-in-TLB-flush.patch
patches.suse/powerpc-perf-Fix-core-imc-hotplug-callback-failure.patch
patches.suse/scsi-qla2xxx-Fix-oops-in-qla2x00_probe_one-error-pat.patch
patches.suse/KVM-VMX-Do-not-fully-reset-PI-descriptor-on-vCPU-res.patch
@@ -10155,8 +10154,10 @@
patches.suse/powerpc-powernv-Enable-TM-without-suspend-if-possibl.patch
patches.suse/powerpc-tm-P9-disable-transactionally-suspended-sigc.patch
patches.suse/powerpc-xmon-Check-before-calling-xive-functions.patch
+ patches.suse/powerpc-mm-radix-Drop-unneeded-NULL-check.patch
patches.suse/powerpc-perf-hv-24x7-Fix-incorrect-comparison-in-mem.patch
patches.suse/KVM-PPC-Book3S-HV-Handle-host-system-reset-in-guest-.patch
+ patches.suse/powerpc-64s-radix-Improve-preempt-handling-in-TLB-co.patch
patches.suse/powerpc-64s-radix-Fix-process-table-entry-cache-inva.patch
patches.suse/powerpc-pseries-vio-Dispose-of-virq-mapping-on-vdevice-unregister.patch
patches.suse/powerpc-ipic-Fix-status-get-and-status-clear.patch
@@ -10176,6 +10177,10 @@
patches.suse/powerpc-powernv-cpufreq-Fix-the-frequency-read-by-pr.patch
patches.suse/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch
patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
+ patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch
+ patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
+ patches.suse/powerpc-64s-radix-Introduce-local-single-page-ceilin.patch
+ patches.suse/powerpc-64s-radix-Improve-TLB-flushing-for-page-tabl.patch
patches.suse/powerpc-mm-radix-Fix-crashes-on-Power9-DD1-with-radix.patch
patches.suse/powerpc-kprobes-Disable-preemption-before-invoking-p.patch
patches.suse/powerpc-powernv-idle-Round-up-latency-and-residency-.patch
@@ -10353,6 +10358,7 @@
patches.suse/NFSv4.1-Fix-up-replays-of-interrupted-requests.patch
patches.suse/NFS-Avoid-RCU-usage-in-tracepoints.patch
patches.suse/sunrpc-Fix-rpc_task_begin-trace-point.patch
+ patches.suse/NFSv4-Fix-OPEN-CLOSE-race.patch
patches.suse/NFS-Fix-a-typo-in-nfs_rename.patch
patches.suse/0002-nfs-Fix-ugly-referral-attributes.patch
patches.suse/NFS-Fix-typo-in-nomigration-mount-option.patch
@@ -15720,6 +15726,8 @@
patches.suse/4.4.134-183-powerpc-mpic-Check-if-cpu_possible-in-mpic_ph.patch
patches.suse/powerpc-setup-Add-cpu_to_phys_id-array.patch
patches.suse/powerpc-kvm-Fix-guest-boot-failure-on-Power9-since-D.patch
+ patches.suse/powerpc-mm-Simplify-page_is_ram-by-using-memblock_is.patch
+ patches.suse/powerpc-mm-Use-memblock-API-for-PPC32-page_is_ram.patch
patches.suse/powerpc-kexec_file-Fix-error-code-when-trying-to-loa.patch
patches.suse/powerpc-eeh-Fix-race-with-driver-un-bind.patch
patches.suse/powerpc-64s-idle-avoid-sync-for-KVM-state-when-wakin.patch
@@ -17509,6 +17517,7 @@
patches.suse/powerpc-powernv-cpuidle-Init-all-present-cpus-for-de.patch
patches.suse/powerpc-livepatch-fix-build-error-with-kprobes-disabled.patch
patches.suse/cxl-Configure-PSL-to-not-use-APC-virtual-machines.patch
+ patches.suse/powerpc-mm-radix-implement-LPID-based-TLB-flushes-to.patch
patches.suse/mm-powerpc-x86-define-VM_PKEY_BITx-bits-if-CONFIG_AR.patch
patches.suse/mm-powerpc-x86-introduce-an-additional-vma-bit-for-p.patch
patches.suse/mm-pkeys-Remove-include-of-asm-mmu_context.h-from-pk.patch
@@ -17929,6 +17938,7 @@
patches.suse/nfsd-fix-potential-use-after-free-in-nfsd4_decode_ge.patch
patches.suse/0001-rculist-add-list_for_each_entry_from_rcu.patch
patches.suse/0003-NFS-Avoid-quadratic-search-when-freeing-delegations.patch
+ patches.suse/NFSv4-Only-pass-the-delegation-to-setattr-if-we-re-s.patch
patches.suse/0004-NFSv4-Fix-possible-1-byte-stack-overflow-in-nfs_idma.patch
patches.suse/pnfs-Don-t-release-the-sequence-slot-until-we-ve-pro.patch
patches.suse/xprtrdma-Return-ENOBUFS-when-no-pages-are-available.patch
@@ -18110,6 +18120,7 @@
patches.suse/0017-arm64-dma-mapping-clear-buffers-allocated-with-FORCE.patch
patches.suse/0001-arm64-kpti-Use-early_param-for-kpti-command-line-opt.patch
patches.suse/0020-arm64-mm-Ensure-writes-to-swapper-are-ordered-wrt-su.patch
+ patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
patches.suse/powerpc-64s-Fix-DT-CPU-features-Power9-DD2.1-logic.patch
patches.suse/block-fix-timeout-changes-for-legacy-request-drivers.patch
patches.suse/0001-block-sed-opal-Fix-a-couple-off-by-one-bugs.patch
@@ -19865,6 +19876,7 @@
patches.suse/Revert-cdc-acm-implement-put_char-and-flush_chars.patch
patches.suse/0001-xen-manage-don-t-complain-about-an-empty-value-in-co.patch
patches.suse/NFSv4.1-fix-infinite-loop-on-I-O.patch
+ patches.suse/NFS-Don-t-open-code-clearing-of-delegation-state.patch
patches.suse/fs-cifs-suppress-a-string-overflow-warning.patch
patches.suse/fs-cifs-require-sha512.patch
patches.suse/cifs-prevent-integer-overflow-in-nxt_dir_entry.patch
@@ -20674,6 +20686,8 @@
patches.suse/cifs-update-internal-module-version-number-for-cifs-ko-to-2-14.patch
patches.suse/NFSv4.1-Fix-the-r-wsize-checking.patch
patches.suse/NFS-Fix-dentry-revalidation-on-NFSv4-lookup.patch
+ patches.suse/NFS-Refactor-nfs_lookup_revalidate.patch
+ patches.suse/NFSv4-Fix-lookup-revalidate-of-regular-files.patch
patches.suse/nfs-Fix-a-missed-page-unlock-after-pg_doio.patch
patches.suse/sunrpc-safely-reallow-resvport-min-max-inversion.patch
patches.suse/xprtrdma-Reset-credit-grant-properly-after-a-disconn.patch
@@ -20704,6 +20718,7 @@
patches.suse/powerpc-process-Fix-sparse-address-space-warnings.patch
patches.suse/powerpc-boot-Expose-Kconfig-symbols-to-wrapper.patch
patches.suse/powerpc-boot-Fix-opal-console-in-boot-wrapper.patch
+ patches.suse/powerpc-pseries-memory-hotplug-Fix-return-value-type.patch
patches.suse/powerpc-pseries-mobility-Extend-start-stop-topology-.patch
patches.suse/powerpc-Detect-the-presence-of-big-cores-via-ibm-thr.patch
patches.suse/powerpc-Use-cpu_smallcore_sibling_mask-at-SMT-level-.patch
@@ -22564,12 +22579,15 @@
patches.suse/btrfs-ensure-that-a-dup-or-raid1-block-group-has-exactly-two-stripes.patch
patches.suse/xfs-don-t-overflow-xattr-listent-buffer.patch
patches.suse/xfs-fix-reporting-supported-extra-file-attributes-fo.patch
+ patches.suse/powerpc-irq-drop-arch_early_irq_init.patch
patches.suse/powerpc-powernv-Remove-never-used-pnv_power9_force_s.patch
+ patches.suse/powerpc-powernv-npu-Remove-obsolete-comment-about-TC.patch
patches.suse/powerpc-pseries-Perform-full-re-add-of-CPU-for-topol.patch
patches.suse/powerpc-perf-add-mem-access-events-to-sysfs.patch
patches.suse/powerpc-64s-clear-on-stack-exception-marker-upon-exception-return.patch
patches.suse/powerpc-livepatch-relax-reliable-stack-tracer-checks-for-first-frame.patch
patches.suse/powerpc-livepatch-small-cleanups-in-save_stack_trace_tsk_reliable.patch
+ patches.suse/powerpc-Drop-page_is_ram-and-walk_system_ram_range.patch
patches.suse/powerpc-ptrace-Simplify-vr_get-set-to-avoid-GCC-warn.patch
patches.suse/powerpc-pseries-export-timebase-register-sample-in-l.patch
patches.suse/powerpc-Fix-32-bit-KVM-PR-lockup-and-host-crash-with.patch
@@ -22578,6 +22596,7 @@
patches.suse/powerpc-powernv-Don-t-reprogram-SLW-image-on-every-K.patch
patches.suse/powerpc-hugetlb-Handle-mmap_min_addr-correctly-in-ge.patch
patches.suse/powerpc-mm-hash-Handle-mmap_min_addr-correctly-in-ge.patch
+ patches.suse/powerpc-powernv-move-OPAL-call-wrapper-tracing-and-i.patch
patches.suse/powerpc-xmon-Fix-opcode-being-uninitialized-in-print.patch
patches.suse/powerpc-powernv-Make-opal-log-only-readable-by-root.patch
patches.suse/powerpc-powernv-ioda-Fix-locked_vm-counting-for-memo.patch
@@ -22830,6 +22849,7 @@
patches.suse/NFS-Fix-an-I-O-request-leakage-in-nfs_do_recoalesce.patch
patches.suse/NFS-Don-t-recoalesce-on-error-in-nfs_pageio_complete.patch
patches.suse/fs-nfs-Fix-nfs_parse_devname-to-not-modify-it-s-argu.patch
+ patches.suse/NFS-Remove-redundant-semicolon.patch
patches.suse/NFS-Fix-a-soft-lockup-in-the-delegation-recovery-cod.patch
patches.suse/NFS-pnfs-Bulk-destroy-of-layouts-needs-to-be-safe-w..patch
patches.suse/NFSv4.1-Reinitialise-sequence-results-before-retrans.patch
@@ -22948,6 +22968,7 @@
patches.suse/cifs-fix-incorrect-handling-of-smb2_set_sparse-return-in-smb3_sim.patch
patches.suse/SMB3-Allow-SMB3-FSCTL-queries-to-be-sent-to-server-from-tools.patch
patches.suse/CIFS-fix-POSIX-lock-leak-and-invalid-ptr-deref.patch
+ patches.suse/powerpc-powernv-Fix-compile-without-CONFIG_TRACEPOIN.patch
patches.suse/It-s-wrong-to-add-len-to-sector_nr-in-raid10-reshape.patch
patches.suse/md-Fix-failed-allocation-of-md_register_thread.patch
patches.suse/nvme-fc-reject-reconnect-if-io-queue-count-is-reduce.patch
@@ -23205,6 +23226,7 @@
patches.suse/0001-PCI-pciehp-Ignore-Link-State-Changes-after-powering-.patch
patches.suse/clk-x86-Add-system-specific-quirk-to-mark-clocks-as-.patch
patches.suse/platform-x86-pmc_atom-Drop-__initconst-on-dmi-table.patch
+ patches.suse/NFS-Forbid-setting-AF_INET6-to-struct-sockaddr_in-si.patch
patches.suse/virtio-blk-limit-number-of-hw-queues-by-nr_cpu_ids.patch
patches.suse/blk-mq-introduce-blk_mq_complete_request_sync.patch
patches.suse/nvme-cancel-request-synchronously.patch
@@ -23286,6 +23308,7 @@
patches.suse/ALSA-core-Fix-card-races-between-register-and-discon.patch
patches.suse/ALSA-hda-realtek-add-two-more-pin-configuration-sets.patch
patches.suse/scsi-core-set-result-when-the-command-cannot-be-dispatched
+ patches.suse/nfsd-Don-t-release-the-callback-slot-unless-it-was-a.patch
patches.suse/ipv4-set-the-tcp_min_rtt_wlen-range-from-0-to-one-da.patch
patches.suse/mlxsw-spectrum-Fix-autoneg-status-in-ethtool.patch
patches.suse/stmmac-pci-Adjust-IOT2000-matching.patch
@@ -23755,7 +23778,10 @@
patches.suse/dmaengine-axi-dmac-Don-t-check-the-number-of-frames-.patch
patches.suse/dmaengine-tegra210-dma-free-dma-controller-in-remove.patch
patches.suse/RDMA-rxe-Consider-skb-reserve-space-based-on-netdev-.patch
+ patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch
patches.suse/NFS-make-nfs_match_client-killable.patch
+ patches.suse/PNFS-fallback-to-MDS-if-no-deviceid-found.patch
+ patches.suse/NFS4-Fix-v4.0-client-state-corruption-when-mount.patch
patches.suse/NFS-Fix-a-double-unlock-from-nfs_match-get_client.patch
patches.suse/i2c-piix4-add-hygon-dhyana-smbus-support.patch
patches.suse/rtc-don-t-reference-bogus-function-pointer-in-kdoc.patch
@@ -23865,6 +23891,7 @@
patches.suse/x86_64-allow-breakpoints-to-emulate-call-instructions.patch
patches.suse/ftrace-x86_64-emulate-call-function-while-updating-in-breakpoint-handler.patch
patches.suse/tracing-fix-partial-reading-of-trace-event-s-id-file.patch
+ patches.suse/SUNRPC-nfs-Fix-return-value-for-nfs4_callback_compou.patch
patches.suse/0001-xenbus-drop-useless-LIST_HEAD-in-xenbus_write_watch-.patch
patches.suse/power-supply-axp288_charger-Fix-unchecked-return-val.patch
patches.suse/power-supply-max14656-fix-potential-use-before-alloc.patch
@@ -23899,6 +23926,7 @@
patches.suse/kvm-x86-skip-efer-vs-guest-cpuid-checks-for-host-initiated-writes
patches.suse/KVM-polling-add-architecture-backend-to-disable-poll.patch
patches.suse/KVM-s390-provide-kvm_arch_no_poll-function.patch
+ patches.suse/KVM-PPC-Book3S-HV-Fix-lockdep-warning-when-entering-.patch
patches.suse/KVM-PPC-Book3S-HV-Avoid-lockdep-debugging-in-TCE-rea.patch
patches.suse/KVM-PPC-Book3S-HV-Handle-virtual-mode-in-XIVE-VCPU-p.patch
patches.suse/ALSA-hda-realtek-Fixup-headphone-noise-via-runtime-s.patch
@@ -24034,6 +24062,8 @@
patches.suse/powerpc-perf-Fix-MMCRA-corruption-by-bhrb_filter.patch
patches.suse/efi-x86-Add-missing-error-handling-to-old_memmap-1-1.patch
patches.suse/x86-cpu-amd-don-t-force-the-cpb-cap-when-running-under-a-hypervisor.patch
+ patches.suse/SUNRPC-fix-regression-in-umount-of-a-secure-mount.patch
+ patches.suse/NFSv4.1-Again-fix-a-race-where-CB_NOTIFY_LOCK-fails-.patch
patches.suse/fuse-fallocate-fix-return-with-locked-inode.patch
patches.suse/s390-qeth-fix-vlan-attribute-in-bridge_hostnotify-udev-event
patches.suse/Fix-memory-leak-in-sctp_process_init.patch
@@ -24093,7 +24123,6 @@
patches.suse/drm-i915-sdvo-Implement-proper-HDMI-audio-support-fo.patch
patches.suse/drm-i915-perf-fix-whitelist-on-Gen10.patch
patches.suse/cgroup-use-css_tryget-instead-of-css_tryget_online-in-task_get_css.patch
- patches.suse/module-fix-livepatch-ftrace-module-text-permissions-race.patch
patches.suse/USB-usb-storage-Add-new-ID-to-ums-realtek.patch
patches.suse/USB-Fix-chipmunk-like-voice-when-using-Logitech-C270.patch
patches.suse/0001-usb-dwc2-host-Fix-wMaxPacketSize-handling-fix-webcam.patch
@@ -24191,7 +24220,6 @@
patches.suse/cpu-speculation-warn-on-unsupported-mitigations-parameter.patch
patches.suse/cifs-fix-crash-querying-symlinks-stored-as-reparse-points.patch
patches.suse/Bluetooth-Fix-faulty-expression-for-minimum-encrypti.patch
- patches.suse/ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch
patches.suse/tracing-snapshot-resize-spare-buffer-if-size-changed.patch
patches.suse/ptrace-Fix-ptracer_cred-handling-for-PTRACE_TRACEME.patch
patches.suse/ALSA-hda-realtek-Add-quirks-for-several-Clevo-notebo.patch
@@ -24486,6 +24514,8 @@
patches.suse/powerpc-mm-Fix-node-look-up-with-numa-off-boot.patch
patches.suse/powerpc-mm-Consolidate-numa_enable-check-and-min_com.patch
patches.suse/powerpc-mm-Handle-page-table-allocation-failures.patch
+ patches.suse/powerpc-module64-Fix-comment-in-R_PPC64_ENTRY-handli.patch
+ patches.suse/powerpc-irq-Don-t-WARN-continuously-in-arch_local_ir.patch
patches.suse/mailbox-handle-failed-named-mailbox-channel-request.patch
patches.suse/platform-x86-asus-wmi-Only-Tell-EC-the-OS-will-handl.patch
patches.suse/platform-x86-pmc_atom-Add-CB4063-Beckhoff-Automation.patch
@@ -24541,6 +24571,7 @@
patches.suse/ceph-hold-i_ceph_lock-when-removing-caps-for-freeing-inode.patch
patches.suse/ceph-don-t-blindly-unregister-session-that-is-in-opening-state.patch
patches.suse/ceph-remove-request-from-waiting-list-before-unregister.patch
+ patches.suse/ceph-use-ceph_evict_inode-to-cleanup-inode-s-resource.patch
patches.suse/cifs-Use-kmemdup-in-SMB2_ioctl_init-.patch
patches.suse/fs-cifs-Drop-unlikely-before-IS_ERR-_OR_NULL-.patch
patches.suse/SMB3-Add-SMB3-1-1-GCM-to-negotiated-crypto-algorigthms.patch
@@ -24568,7 +24599,9 @@
patches.suse/cifs-fix-crash-in-cifs_dfs_do_automount.patch
patches.suse/cifs-fix-crash-in-smb2_compound_op-smb2_set_next_command-.patch
patches.suse/smb3-smbdirect-no-longer-experimental.patch
+ patches.suse/NFSv4-Handle-the-special-Linux-file-open-access-mode.patch
patches.suse/NFS-Cleanup-if-nfs_match_client-is-interrupted.patch
+ patches.suse/pnfs-flexfiles-Fix-PTR_ERR-dereferences-in-ff_layout.patch
patches.suse/dm-zoned-fix-zone-state-management-race.patch
patches.suse/net-neigh-fix-multiple-neigh-timer-scheduling.patch
patches.suse/0015-ISDN-hfcsusb-checking-idx-of-ep-configuration.patch
@@ -24661,6 +24694,7 @@
patches.suse/mm-migrate-Fix-reference-check-race-between-__find_get_block-and-migration.patch
patches.suse/coredump-split-pipe-command-whitespace-before-expand.patch
patches.suse/mm-migrate-c-initialize-pud_entry-in-migrate_vma.patch
+ patches.suse/eeprom-at24-make-spd-world-readable-again.patch
patches.suse/0001-x86-speculation-Prepare-entry-code-for-Spectre-v1-sw.patch
patches.suse/0002-x86-speculation-Enable-Spectre-v1-swapgs-mitigations.patch
patches.suse/x86-speculation-swapgs-exclude-ATOMs-from-speculating-through-SWAPGS.patch
@@ -24712,6 +24746,13 @@
patches.suse/SMB3-Fix-potential-memory-leak-when-processing-compound-chain.patch
patches.suse/smb3-send-CAP_DFS-capability-during-session-setup.patch
patches.suse/SMB3-Kernel-oops-mounting-a-encryptData-share-with-CONFIG_DEBUG_VIR.patch
+ patches.suse/NFSv4-Fix-delegation-state-recovery.patch
+ patches.suse/NFSv4.1-Fix-open-stateid-recovery.patch
+ patches.suse/NFSv4.1-Only-reap-expired-delegations.patch
+ patches.suse/NFSv4-Check-the-return-value-of-update_open_stateid.patch
+ patches.suse/NFSv4-Fix-a-potential-sleep-while-atomic-in-nfs4_do_.patch
+ patches.suse/NFSv4-Fix-an-Oops-in-nfs4_do_setattr.patch
+ patches.suse/NFS-Fix-regression-whereby-fscache-errors-are-appear.patch
patches.suse/crypto-ccp-Fix-oops-by-properly-managing-allocated-s.patch
patches.suse/crypto-ccp-Add-support-for-valid-authsize-values-les.patch
patches.suse/crypto-ccp-Ignore-tag-length-when-decrypting-GCM-cip.patch
@@ -24739,6 +24780,7 @@
patches.suse/usb-iowarrior-fix-deadlock-on-disconnect.patch
patches.suse/iio-adc-max9611-Fix-misuse-of-GENMASK-macro.patch
patches.suse/driver_core-Fix_use-after-free_and_double_free_on_glue.patch
+ patches.suse/nvmem-use-the-same-permissions-for-eeprom-as-for-nvmem.patch
patches.suse/dax-dax_layout_busy_page-should-not-unmap-cow-pages.patch
patches.suse/mm-hmm-fix-bad-subpage-pointer-in-try_to_unmap_one.patch
patches.suse/mm-memcontrol-c-fix-use-after-free-in-mem_cgroup_iter.patch
@@ -24823,6 +24865,10 @@
patches.suse/r8152-Set-memory-to-all-0xFFs-on-failed-reg-reads.patch
patches.suse/ALSA-line6-Fix-memory-leak-at-line6_init_pcm-error-p.patch
patches.suse/ALSA-seq-Fix-potential-concurrent-access-to-the-dele.patch
+ patches.suse/NFSv4-pnfs-Fix-a-page-lock-leak-in-nfs_pageio_resend.patch
+ patches.suse/NFS-Ensure-O_DIRECT-reports-an-error-if-the-bytes-re.patch
+ patches.suse/pNFS-flexfiles-Turn-off-soft-RPC-calls.patch
+ patches.suse/SUNRPC-Handle-connection-breakages-correctly-in-call.patch
patches.suse/0001-drm-i915-Don-t-deballoon-unused-ggtt-drm_mm_node-in-.patch
patches.suse/mmc-sdhci-of-at91-add-quirk-for-broken-HS200.patch
patches.suse/mmc-core-Fix-init-of-SD-cards-reporting-an-invalid-V.patch
@@ -24942,6 +24988,7 @@
patches.suse/powerpc-rtas-use-device-model-APIs-and-serialization.patch
patches.suse/powerpc-64s-support-nospectre_v2-cmdline-option.patch
patches.suse/powerpc-pseries-correctly-track-irq-state-in-default.patch
+ patches.suse/powerpc-xive-Fix-bogus-error-code-returned-by-OPAL.patch
patches.suse/powerpc-dump-kernel-log-before-carrying-out-fadump-o.patch
patches.suse/clk-sunxi-ng-v3s-add-the-missing-PLL_DDR1.patch
patches.suse/clk-sunxi-ng-v3s-add-missing-clock-slices-for-MMC2-m.patch
@@ -25045,6 +25092,7 @@
patches.suse/scsi-qla2xxx-Fix-stuck-login-session.patch
patches.suse/scsi-qla2xxx-Fix-stale-session.patch
patches.suse/scsi-qla2xxx-Update-driver-version-to-10.01.00.19-k.patch
+ patches.suse/scsi-scsi_dh_rdac-zero-cdb-in-send_mode_select.patch
patches.suse/mtd-spi-nor-Fix-Cadence-QSPI-RCU-Schedule-Stall.patch
patches.suse/ext4-set-error-return-correctly-when-ext4_htree_stor.patch
patches.suse/ext4-fix-warning-inside-ext4_convert_unwritten_exten.patch
@@ -25053,6 +25101,17 @@
patches.suse/power-reset-gpio-restart-Fix-typo-when-gpio-reset-is.patch
patches.suse/livepatch-nullify-obj-mod-in-klp_module_coming-s-error-path.patch
patches.suse/suse-hv-PCI-hv-Detect-and-fix-Hyper-V-PCI-domain-number-coll.patch
+ patches.suse/powerpc-pseries-Read-TLB-Block-Invalidate-Characteri.patch
+ patches.suse/powerpc-pseries-Call-H_BLOCK_REMOVE-when-supported.patch
+ patches.suse/powerpc-book3s64-mm-Don-t-do-tlbie-fixup-for-some-ha.patch
+ patches.suse/powerpc-book3s64-radix-Rename-CPU_FTR_P9_TLBIE_BUG-f.patch
+ patches.suse/powerpc-mm-Fixup-tlbie-vs-mtpidr-mtlpidr-ordering-is.patch
+ patches.suse/net-ibmvnic-unlock-rtnl_lock-in-reset-so-linkwatch_e.patch
+ patches.suse/net-ibmvnic-prevent-more-than-one-thread-from-runnin.patch
+ patches.suse/ppp-Fix-memory-leak-in-ppp_write.patch
+ patches.suse/0001-btrfs-relocation-fix-use-after-free-on-dead-relocati.patch
+ patches.suse/0001-btrfs-qgroup-Fix-the-wrong-target-io_tree-when-freei.patch
+ patches.suse/0002-btrfs-qgroup-Fix-reserved-data-space-leak-if-we-have.patch
# jejb/scsi for-next
patches.suse/scsi-qla2xxx-Fix-Nport-ID-display-value.patch
@@ -25851,6 +25910,8 @@
patches.kabi/kabi-fix-struct-ufs_reg-removal-of-unused-field
+ patches.kabi/NFSv4-Fix-OPEN-CLOSE-race.patch
+
########################################################
# You'd better have a good reason for adding a patch
# below here.