Home Home > GIT Browse > SLE12-SP5-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuis Henriques <lhenriques@suse.com>2019-08-27 11:51:08 +0100
committerLuis Henriques <lhenriques@suse.com>2019-09-05 10:08:48 +0100
commitb49713399f2c110401705908c8546ee5778df2c2 (patch)
treeead763d97cf3ceb4c43cfcc9b0dba920d5cfabca
parentba10d886a8e24156f043b74aa84624525bdf2d8c (diff)
libceph: fix PG split vs OSD (re)connect race (bsc#1148133).
-rw-r--r--patches.suse/libceph-fix-pg-split-vs-osd-reconnect-race.patch71
-rw-r--r--series.conf1
2 files changed, 72 insertions, 0 deletions
diff --git a/patches.suse/libceph-fix-pg-split-vs-osd-reconnect-race.patch b/patches.suse/libceph-fix-pg-split-vs-osd-reconnect-race.patch
new file mode 100644
index 0000000000..211e407664
--- /dev/null
+++ b/patches.suse/libceph-fix-pg-split-vs-osd-reconnect-race.patch
@@ -0,0 +1,71 @@
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 20 Aug 2019 16:40:33 +0200
+Subject: libceph: fix PG split vs OSD (re)connect race
+Git-commit: a561372405cf6bc6f14239b3a9e57bb39f2788b0
+Patch-mainline: v5.3-rc6
+References: bsc#1148133
+
+We can't rely on ->peer_features in calc_target() because it may be
+called both when the OSD session is established and open and when it's
+not. ->peer_features is not valid unless the OSD session is open. If
+this happens on a PG split (pg_num increase), that could mean we don't
+resend a request that should have been resent, hanging the client
+indefinitely.
+
+In userspace this was fixed by looking at require_osd_release and
+get_xinfo[osd].features fields of the osdmap. However these fields
+belong to the OSD section of the osdmap, which the kernel doesn't
+decode (only the client section is decoded).
+
+Instead, let's drop this feature check. It effectively checks for
+luminous, so only pre-luminous OSDs would be affected in that on a PG
+split the kernel might resend a request that should not have been
+resent. Duplicates can occur in other scenarios, so both sides should
+already be prepared for them: see dup/replay logic on the OSD side and
+retry_attempt check on the client side.
+
+Cc: stable@vger.kernel.org
+Fixes: 7de030d6b10a ("libceph: resend on PG splits if OSD has RESEND_ON_SPLIT")
+Link: https://tracker.ceph.com/issues/41162
+Reported-by: Jerry Lee <leisurelysw24@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Tested-by: Jerry Lee <leisurelysw24@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Luis Henriques <lhenriques@suse.com>
+---
+ net/ceph/osd_client.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
+index 0b2df09b2554..78ae6e8c953d 100644
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -1496,7 +1496,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
+ struct ceph_osds up, acting;
+ bool force_resend = false;
+ bool unpaused = false;
+- bool legacy_change;
++ bool legacy_change = false;
+ bool split = false;
+ bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+ bool recovery_deletes = ceph_osdmap_flag(osdc,
+@@ -1584,15 +1584,14 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
+ t->osd = acting.primary;
+ }
+
+- if (unpaused || legacy_change || force_resend ||
+- (split && con && CEPH_HAVE_FEATURE(con->peer_features,
+- RESEND_ON_SPLIT)))
++ if (unpaused || legacy_change || force_resend || split)
+ ct_res = CALC_TARGET_NEED_RESEND;
+ else
+ ct_res = CALC_TARGET_NO_ACTION;
+
+ out:
+- dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
++ dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused,
++ legacy_change, force_resend, split, ct_res, t->osd);
+ return ct_res;
+ }
+
+
diff --git a/series.conf b/series.conf
index f4407bc8b0..9564b6ef9a 100644
--- a/series.conf
+++ b/series.conf
@@ -24131,6 +24131,7 @@
patches.suse/ceph-fix-buffer-free-while-holding-i_ceph_lock-in-_ceph_build_xattrs_blob.patch
patches.suse/ceph-fix-buffer-free-while-holding-i_ceph_lock-in-fill_inode.patch
patches.suse/ceph-don-t-try-fill-file_lock-on-unsuccessful-getfilelock-reply.patch
+ patches.suse/libceph-fix-pg-split-vs-osd-reconnect-race.patch
patches.suse/vfs-fix-page-locking-deadlocks-when-deduping-files.patch
patches.suse/fs-xfs-Fix-return-code-of-xfs_break_leased_layouts.patch
patches.suse/Revert-dm-bufio-fix-deadlock-with-loop-device.patch