Home Home > GIT Browse > stable
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Mahoney <jeffm@suse.com>2019-03-25 13:31:48 -0400
committerJeff Mahoney <jeffm@suse.com>2019-03-25 13:31:48 -0400
commit76ff396be28057270c6c5b1ea30196334e695252 (patch)
tree4084d29bb7a885b197c70008cb05731355bb8221
parentc52890c381f3f7f25d4fd47afab2feee60567d79 (diff)
btrfs: honor path->skip_locking in backref code (bsc#1099312).stable
-rw-r--r--patches.suse/btrfs-honor-path-skip_locking-in-backref-code.patch156
-rw-r--r--series.conf2
2 files changed, 158 insertions, 0 deletions
diff --git a/patches.suse/btrfs-honor-path-skip_locking-in-backref-code.patch b/patches.suse/btrfs-honor-path-skip_locking-in-backref-code.patch
new file mode 100644
index 0000000000..43599f2fc1
--- /dev/null
+++ b/patches.suse/btrfs-honor-path-skip_locking-in-backref-code.patch
@@ -0,0 +1,156 @@
+From: Josef Bacik <josef@toxicpanda.com>
+Subject: btrfs: honor path->skip_locking in backref code
+Git-commit: 38e3eebff643db725633657d1d87a3be019d1018
+Patch-mainline: v5.1-rc1
+References: bsc#1099312
+
+Qgroups will do the old roots lookup at delayed ref time, which could be
+while walking down the extent root while running a delayed ref. This
+should be fine, except we specifically lock eb's in the backref walking
+code irrespective of path->skip_locking, which deadlocks the system.
+Fix up the backref code to honor path->skip_locking, nobody will be
+modifying the commit_root when we're searching so it's completely safe
+to do.
+
+This happens since fb235dc06fac ("btrfs: qgroup: Move half of the qgroup
+accounting time out of commit trans"), kernel may lockup with quota
+enabled.
+
+There is one backref trace triggered by snapshot dropping along with
+write operation in the source subvolume. The example can be reliably
+reproduced:
+
+ btrfs-cleaner D 0 4062 2 0x80000000
+ Call Trace:
+ schedule+0x32/0x90
+ btrfs_tree_read_lock+0x93/0x130 [btrfs]
+ find_parent_nodes+0x29b/0x1170 [btrfs]
+ btrfs_find_all_roots_safe+0xa8/0x120 [btrfs]
+ btrfs_find_all_roots+0x57/0x70 [btrfs]
+ btrfs_qgroup_trace_extent_post+0x37/0x70 [btrfs]
+ btrfs_qgroup_trace_leaf_items+0x10b/0x140 [btrfs]
+ btrfs_qgroup_trace_subtree+0xc8/0xe0 [btrfs]
+ do_walk_down+0x541/0x5e3 [btrfs]
+ walk_down_tree+0xab/0xe7 [btrfs]
+ btrfs_drop_snapshot+0x356/0x71a [btrfs]
+ btrfs_clean_one_deleted_snapshot+0xb8/0xf0 [btrfs]
+ cleaner_kthread+0x12b/0x160 [btrfs]
+ kthread+0x112/0x130
+ ret_from_fork+0x27/0x50
+
+When dropping snapshots with qgroup enabled, we will trigger backref
+walk.
+
+However such backref walk at that timing is pretty dangerous, as if one
+of the parent nodes get WRITE locked by other thread, we could cause a
+dead lock.
+
+For example:
+
+ FS 260 FS 261 (Dropped)
+ node A node B
+ / \ / \
+ node C node D node E
+ / \ / \ / \
+ leaf F|leaf G|leaf H|leaf I|leaf J|leaf K
+
+The lock sequence would be:
+
+ Thread A (cleaner) | Thread B (other writer)
+-----------------------------------------------------------------------
+write_lock(B) |
+write_lock(D) |
+^^^ called by walk_down_tree() |
+ | write_lock(A)
+ | write_lock(D) << Stall
+read_lock(H) << for backref walk |
+read_lock(D) << lock owner is |
+ the same thread A |
+ so read lock is OK |
+read_lock(A) << Stall |
+
+So thread A hold write lock D, and needs read lock A to unlock.
+While thread B holds write lock A, while needs lock D to unlock.
+
+This will cause a deadlock.
+
+This is not only limited to snapshot dropping case. As the backref
+walk, even only happens on commit trees, is breaking the normal top-down
+locking order, makes it deadlock prone.
+
+Fixes: fb235dc06fac ("btrfs: qgroup: Move half of the qgroup accounting time out of commit trans")
+CC: stable@vger.kernel.org # 4.14+
+Reported-and-tested-by: David Sterba <dsterba@suse.com>
+Reported-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+[ rebase to latest branch and fix lock assert bug in btrfs/007 ]
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+[ copy logs and deadlock analysis from Qu's patch ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Acked-by: Jeff Mahoney <jeffm@suse.com>
+---
+ fs/btrfs/backref.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
+index 136454dbb4af..11459fe84a29 100644
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -712,7 +712,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ * read tree blocks and add keys where required.
+ */
+ static int add_missing_keys(struct btrfs_fs_info *fs_info,
+- struct preftrees *preftrees)
++ struct preftrees *preftrees, bool lock)
+ {
+ struct prelim_ref *ref;
+ struct extent_buffer *eb;
+@@ -737,12 +737,14 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
+ free_extent_buffer(eb);
+ return -EIO;
+ }
+- btrfs_tree_read_lock(eb);
++ if (lock)
++ btrfs_tree_read_lock(eb);
+ if (btrfs_header_level(eb) == 0)
+ btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+ else
+ btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+- btrfs_tree_read_unlock(eb);
++ if (lock)
++ btrfs_tree_read_unlock(eb);
+ free_extent_buffer(eb);
+ prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
+ cond_resched();
+@@ -1227,7 +1229,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
+
+ btrfs_release_path(path);
+
+- ret = add_missing_keys(fs_info, &preftrees);
++ ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
+ if (ret)
+ goto out;
+
+@@ -1288,11 +1290,16 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
+ ret = -EIO;
+ goto out;
+ }
+- btrfs_tree_read_lock(eb);
+- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++
++ if (!path->skip_locking) {
++ btrfs_tree_read_lock(eb);
++ btrfs_set_lock_blocking_rw(eb,
++ BTRFS_READ_LOCK);
++ }
+ ret = find_extent_in_eb(eb, bytenr,
+ *extent_item_pos, &eie, ignore_offset);
+- btrfs_tree_read_unlock_blocking(eb);
++ if (!path->skip_locking)
++ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ if (ret < 0)
+ goto out;
+
diff --git a/series.conf b/series.conf
index 2242680474..01cf6c6711 100644
--- a/series.conf
+++ b/series.conf
@@ -591,6 +591,8 @@
# btrfs
########################################################
+ patches.suse/btrfs-honor-path-skip_locking-in-backref-code.patch
+
# Not upstream yet
patches.suse/uapi-add-a-compatibility-layer-between-linux-uio-h-and-glibc