Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKernel Build Daemon <kbuild@suse.de>2019-04-23 07:22:31 +0200
committerKernel Build Daemon <kbuild@suse.de>2019-04-23 07:22:31 +0200
commit0fa8e39f58196d502d8f49517345cde9fc106fe1 (patch)
treef06949e898000b17ef0e712e962ac48c580716c7
parent92c8dc212d7a36a9026d420a934de3ac43b1404d (diff)
parent0ad013771ca6d42852157449ced8dfc08dde5960 (diff)
Merge branch 'SLE12-SP3' into openSUSE-42.3
-rw-r--r--patches.arch/cpu-speculation-add-mitigations-cmdline-option.patch161
-rw-r--r--patches.arch/perf-x86-amd-add-event-map-for-amd-family-17h.patch97
-rw-r--r--patches.arch/powerpc-speculation-support-mitigations-cmdline-option.patch117
-rw-r--r--patches.arch/s390-speculation-support-mitigations-cmdline-option.patch91
-rw-r--r--patches.arch/x86-speculation-support-mitigations-cmdline-option.patch148
-rw-r--r--patches.fixes/0001-btrfs-Fix-bound-checking-in-qgroup_trace_new_subtree.patch44
-rw-r--r--patches.fixes/0001-btrfs-qgroup-Search-commit-root-for-rescan-to-avoid-.patch104
-rw-r--r--patches.fixes/0001-net-sysfs-call-dev_hold-if-kobject_init_and_add-succ.patch64
-rw-r--r--patches.fixes/fuse-continue-to-send-FUSE_RELEASEDIR-when-FUSE_OPEN-returns-ENOSYS.patch135
-rw-r--r--patches.fixes/fuse-fix-possibly-missed-wake-up-after-abort.patch58
-rw-r--r--patches.suse/0001-btrfs-qgroup-Introduce-trace-event-to-analyse-the-nu.patch82
-rw-r--r--patches.suse/0001-btrfs-relocation-Delay-reloc-tree-deletion-after-mer.patch245
-rw-r--r--patches.suse/0002-btrfs-qgroup-Introduce-function-to-trace-two-swaped-.patch237
-rw-r--r--patches.suse/0002-btrfs-qgroup-Refactor-btrfs_qgroup_trace_subtree_swa.patch138
-rw-r--r--patches.suse/0003-btrfs-qgroup-Introduce-function-to-find-all-new-tree.patch184
-rw-r--r--patches.suse/0003-btrfs-qgroup-Introduce-per-root-swapped-blocks-infra.patch415
-rw-r--r--patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch226
-rw-r--r--patches.suse/0004-btrfs-qgroup-Use-generation-aware-subtree-swap-to-ma.patch213
-rw-r--r--patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch139
-rw-r--r--patches.suse/0005-btrfs-qgroup-Don-t-trace-subtree-if-we-re-dropping-r.patch67
-rw-r--r--patches.suse/0006-btrfs-qgroup-Only-trace-data-extents-in-leaves-if-we.patch188
-rw-r--r--patches.suse/btrfs-btrfs-use-the-new-VFS-super_block_dev.patch14
-rw-r--r--patches.suse/btrfs-quota-Set-rescan-progress-to-u64-1-if-we-hit-l.patch4
-rw-r--r--series.conf23
24 files changed, 3185 insertions, 9 deletions
diff --git a/patches.arch/cpu-speculation-add-mitigations-cmdline-option.patch b/patches.arch/cpu-speculation-add-mitigations-cmdline-option.patch
new file mode 100644
index 0000000000..d7ab0da829
--- /dev/null
+++ b/patches.arch/cpu-speculation-add-mitigations-cmdline-option.patch
@@ -0,0 +1,161 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 12 Apr 2019 15:39:28 -0500
+Subject: cpu/speculation: Add 'mitigations=' cmdline option
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
+Patch-mainline: Queued for v5.2
+Git-commit: 98af8452945c55652de68536afdde3b520fec429
+References: bsc#1112178
+
+Keeping track of the number of mitigations for all the CPU speculation
+bugs has become overwhelming for many users. It's getting more and more
+complicated to decide which mitigations are needed for a given
+architecture. Complicating matters is the fact that each arch tends to
+have its own custom way to mitigate the same vulnerability.
+
+Most users fall into a few basic categories:
+
+a) they want all mitigations off;
+
+b) they want all reasonable mitigations on, with SMT enabled even if
+ it's vulnerable; or
+
+c) they want all reasonable mitigations on, with SMT disabled if
+ vulnerable.
+
+Define a set of curated, arch-independent options, each of which is an
+aggregation of existing options:
+
+- mitigations=off: Disable all mitigations.
+
+- mitigations=auto: [default] Enable all the default mitigations, but
+ leave SMT enabled, even if it's vulnerable.
+
+- mitigations=auto,nosmt: Enable all the default mitigations, disabling
+ SMT if needed by a mitigation.
+
+Currently, these options are placeholders which don't actually do
+anything. They will be fleshed out in upcoming patches.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
+Reviewed-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Waiman Long <longman@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: linux-s390@vger.kernel.org
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: linux-arch@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Steven Price <steven.price@arm.com>
+Cc: Phil Auld <pauld@redhat.com>
+Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com
+
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 24 ++++++++++++++++++++++++
+ include/linux/cpu.h | 24 ++++++++++++++++++++++++
+ kernel/cpu.c | 15 +++++++++++++++
+ 3 files changed, 63 insertions(+)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2314,6 +2314,30 @@ bytes respectively. Such letter suffixes
+ in the "bleeding edge" mini2440 support kernel at
+ http://repo.or.cz/w/linux-2.6/mini2440.git
+
++ mitigations=
++ Control optional mitigations for CPU vulnerabilities.
++ This is a set of curated, arch-independent options, each
++ of which is an aggregation of existing arch-specific
++ options.
++
++ off
++ Disable all optional CPU mitigations. This
++ improves system performance, but it may also
++ expose users to several CPU vulnerabilities.
++
++ auto (default)
++ Mitigate all CPU vulnerabilities, but leave SMT
++ enabled, even if it's vulnerable. This is for
++ users who don't want to be surprised by SMT
++ getting disabled across kernel upgrades, or who
++ have other ways of avoiding SMT-based attacks.
++ This is the default behavior.
++
++ auto,nosmt
++ Mitigate all CPU vulnerabilities, disabling SMT
++ if needed. This is for users who always want to
++ be fully mitigated, even if it means losing SMT.
++
+ mminit_loglevel=
+ [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+ parameter allows control of the logging verbosity for
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -314,4 +314,28 @@ static inline void cpu_smt_check_topolog
+ static inline void cpu_smt_check_topology(void) { }
+ #endif
+
++/*
++ * These are used for a global "mitigations=" cmdline option for toggling
++ * optional CPU mitigations.
++ */
++enum cpu_mitigations {
++ CPU_MITIGATIONS_OFF,
++ CPU_MITIGATIONS_AUTO,
++ CPU_MITIGATIONS_AUTO_NOSMT,
++};
++
++extern enum cpu_mitigations cpu_mitigations;
++
++/* mitigations=off */
++static inline bool cpu_mitigations_off(void)
++{
++ return cpu_mitigations == CPU_MITIGATIONS_OFF;
++}
++
++/* mitigations=auto,nosmt */
++static inline bool cpu_mitigations_auto_nosmt(void)
++{
++ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
++}
++
+ #endif /* _LINUX_CPU_H_ */
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -1206,3 +1206,18 @@ void init_cpu_online(const struct cpumas
+ {
+ cpumask_copy(to_cpumask(cpu_online_bits), src);
+ }
++
++enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
++
++static int __init mitigations_parse_cmdline(char *arg)
++{
++ if (!strcmp(arg, "off"))
++ cpu_mitigations = CPU_MITIGATIONS_OFF;
++ else if (!strcmp(arg, "auto"))
++ cpu_mitigations = CPU_MITIGATIONS_AUTO;
++ else if (!strcmp(arg, "auto,nosmt"))
++ cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
++
++ return 0;
++}
++early_param("mitigations", mitigations_parse_cmdline);
diff --git a/patches.arch/perf-x86-amd-add-event-map-for-amd-family-17h.patch b/patches.arch/perf-x86-amd-add-event-map-for-amd-family-17h.patch
new file mode 100644
index 0000000000..56d4402a9a
--- /dev/null
+++ b/patches.arch/perf-x86-amd-add-event-map-for-amd-family-17h.patch
@@ -0,0 +1,97 @@
+From: Kim Phillips <kim.phillips@amd.com>
+Date: Thu, 21 Mar 2019 21:15:22 +0000
+Subject: perf/x86/amd: Add event map for AMD Family 17h
+Git-commit: 3fe3331bb285700ab2253dbb07f8e478fcea2f1b
+Patch-mainline: v5.1-rc6
+References: bsc#1114648
+
+Family 17h differs from prior families by:
+
+ - Does not support an L2 cache miss event
+ - It has re-enumerated PMC counters for:
+ - L2 cache references
+ - front & back end stalled cycles
+
+So we add a new amd_f17h_perfmon_event_map[] so that the generic
+perf event names will resolve to the correct h/w events on
+family 17h and above processors.
+
+Reference sections 2.1.13.3.3 (stalls) and 2.1.13.3.6 (L2):
+
+ https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
+
+Signed-off-by: Kim Phillips <kim.phillips@amd.com>
+Cc: <stable@vger.kernel.org> # v4.9+
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Martin Liška <mliska@suse.cz>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Pu Wen <puwen@hygon.cn>
+Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors")
+[ Improved the formatting a bit. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ arch/x86/kernel/cpu/perf_event_amd.c | 35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/cpu/perf_event_amd.c
++++ b/arch/x86/kernel/cpu/perf_event_amd.c
+@@ -113,22 +113,39 @@ static __initconst const u64 amd_hw_cach
+ };
+
+ /*
+- * AMD Performance Monitor K7 and later.
++ * AMD Performance Monitor K7 and later, up to and including Family 16h:
+ */
+ static const u64 amd_perfmon_event_map[] =
+ {
+- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
+- [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
+- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
++ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
++ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
++ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
++ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
++ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
++ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
++ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
++ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
++};
++
++/*
++ * AMD Performance Monitor Family 17h and later:
++ */
++static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
++{
++ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
++ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
++ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
++ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
++ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
++ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
++ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
+ };
+
+ static u64 amd_pmu_event_map(int hw_event)
+ {
++ if (boot_cpu_data.x86 >= 0x17)
++ return amd_f17h_perfmon_event_map[hw_event];
++
+ return amd_perfmon_event_map[hw_event];
+ }
+
diff --git a/patches.arch/powerpc-speculation-support-mitigations-cmdline-option.patch b/patches.arch/powerpc-speculation-support-mitigations-cmdline-option.patch
new file mode 100644
index 0000000000..d23c836b51
--- /dev/null
+++ b/patches.arch/powerpc-speculation-support-mitigations-cmdline-option.patch
@@ -0,0 +1,117 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 12 Apr 2019 15:39:30 -0500
+Subject: powerpc/speculation: Support 'mitigations=' cmdline option
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
+Git-commit: 782e69efb3dfed6e8360bc612e8c7827a901a8f9
+Patch-mainline: Queued for v5.2
+References: bsc#1112178
+
+Configure powerpc CPU runtime speculation bug mitigations in accordance
+with the 'mitigations=' cmdline option. This affects Meltdown, Spectre
+v1, Spectre v2, and Speculative Store Bypass.
+
+The default behavior is unchanged.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
+Reviewed-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Waiman Long <longman@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: linux-s390@vger.kernel.org
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: linux-arch@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Steven Price <steven.price@arm.com>
+Cc: Phil Auld <pauld@redhat.com>
+Link: https://lkml.kernel.org/r/245a606e1a42a558a310220312d9b6adb9159df6.1555085500.git.jpoimboe@redhat.com
+
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 9 +++++----
+ arch/powerpc/kernel/security.c | 5 +++--
+ arch/powerpc/kernel/setup_64.c | 2 +-
+ 3 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/arch/powerpc/kernel/security.c
++++ b/arch/powerpc/kernel/security.c
+@@ -8,6 +8,7 @@
+ #include <linux/device.h>
+ #include <linux/seq_buf.h>
+ #include <linux/debugfs.h>
++#include <linux/cpu.h>
+
+ #include <asm/asm-prototypes.h>
+ #include <asm/code-patching.h>
+@@ -52,7 +53,7 @@ void setup_barrier_nospec(void)
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+- if (!no_nospec)
++ if (!no_nospec && !cpu_mitigations_off())
+ enable_barrier_nospec(enable);
+ }
+
+@@ -274,7 +275,7 @@ void setup_stf_barrier(void)
+
+ stf_enabled_flush_types = type;
+
+- if (!no_stf_barrier)
++ if (!no_stf_barrier && !cpu_mitigations_off())
+ stf_barrier_enable(enable);
+ }
+
+--- a/arch/powerpc/kernel/setup_64.c
++++ b/arch/powerpc/kernel/setup_64.c
+@@ -973,7 +973,7 @@ void setup_rfi_flush(enum l1d_flush_type
+
+ enabled_flush_types = types;
+
+- if (!no_rfi_flush)
++ if (!no_rfi_flush && !cpu_mitigations_off())
+ rfi_flush_enable(enable);
+ }
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2315,7 +2315,7 @@ bytes respectively. Such letter suffixes
+ http://repo.or.cz/w/linux-2.6/mini2440.git
+
+ mitigations=
+- [X86] Control optional mitigations for CPU
++ [X86,PPC] Control optional mitigations for CPU
+ vulnerabilities. This is a set of curated,
+ arch-independent options, each of which is an
+ aggregation of existing arch-specific options.
+@@ -2324,10 +2324,11 @@ bytes respectively. Such letter suffixes
+ Disable all optional CPU mitigations. This
+ improves system performance, but it may also
+ expose users to several CPU vulnerabilities.
+- Equivalent to: nopti [X86]
+- nospectre_v2 [X86]
++ Equivalent to: nopti [X86,PPC]
++ nospectre_v1 [PPC]
++ nospectre_v2 [X86,PPC]
+ spectre_v2_user=off [X86]
+- spec_store_bypass_disable=off [X86]
++ spec_store_bypass_disable=off [X86,PPC]
+ l1tf=off [X86]
+
+ auto (default)
diff --git a/patches.arch/s390-speculation-support-mitigations-cmdline-option.patch b/patches.arch/s390-speculation-support-mitigations-cmdline-option.patch
new file mode 100644
index 0000000000..80a0f1f66f
--- /dev/null
+++ b/patches.arch/s390-speculation-support-mitigations-cmdline-option.patch
@@ -0,0 +1,91 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 12 Apr 2019 15:39:31 -0500
+Subject: s390/speculation: Support 'mitigations=' cmdline option
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
+Git-commit: 0336e04a6520bdaefdb0769d2a70084fa52e81ed
+Patch-mainline: Queued for v5.2
+References: bsc#1112178
+
+Configure s390 runtime CPU speculation bug mitigations in accordance
+with the 'mitigations=' cmdline option. This affects Spectre v1 and
+Spectre v2.
+
+The default behavior is unchanged.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
+Reviewed-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Waiman Long <longman@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: linux-s390@vger.kernel.org
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: linux-arch@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Steven Price <steven.price@arm.com>
+Cc: Phil Auld <pauld@redhat.com>
+Link: https://lkml.kernel.org/r/e4a161805458a5ec88812aac0307ae3908a030fc.1555085500.git.jpoimboe@redhat.com
+
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 5 +++--
+ arch/s390/kernel/nospec-branch.c | 3 ++-
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/nospec-branch.c
++++ b/arch/s390/kernel/nospec-branch.c
+@@ -2,6 +2,7 @@
+ #include <linux/module.h>
+ #include <linux/device.h>
+ #include <asm/facility.h>
++#include <linux/cpu.h>
+ #include <asm/nospec-branch.h>
+
+ static int __init nobp_setup_early(char *str)
+@@ -59,7 +60,7 @@ early_param("nospectre_v2", nospectre_v2
+
+ void __init nospec_auto_detect(void)
+ {
+- if (test_facility(156)) {
++ if (test_facility(156) || cpu_mitigations_off()) {
+ /*
+ * The machine supports etokens.
+ * Disable expolines and disable nobp.
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2315,7 +2315,7 @@ bytes respectively. Such letter suffixes
+ http://repo.or.cz/w/linux-2.6/mini2440.git
+
+ mitigations=
+- [X86,PPC] Control optional mitigations for CPU
++ [X86,PPC,S390] Control optional mitigations for CPU
+ vulnerabilities. This is a set of curated,
+ arch-independent options, each of which is an
+ aggregation of existing arch-specific options.
+@@ -2326,7 +2326,8 @@ bytes respectively. Such letter suffixes
+ expose users to several CPU vulnerabilities.
+ Equivalent to: nopti [X86,PPC]
+ nospectre_v1 [PPC]
+- nospectre_v2 [X86,PPC]
++ nobp=0 [S390]
++ nospectre_v2 [X86,PPC,S390]
+ spectre_v2_user=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
+ l1tf=off [X86]
diff --git a/patches.arch/x86-speculation-support-mitigations-cmdline-option.patch b/patches.arch/x86-speculation-support-mitigations-cmdline-option.patch
new file mode 100644
index 0000000000..1f2caf41d3
--- /dev/null
+++ b/patches.arch/x86-speculation-support-mitigations-cmdline-option.patch
@@ -0,0 +1,148 @@
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Fri, 12 Apr 2019 15:39:29 -0500
+Subject: x86/speculation: Support 'mitigations=' cmdline option
+Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
+Git-commit: d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812
+Patch-mainline: Queued for v5.2
+References: bsc#1112178
+
+Configure x86 runtime CPU speculation bug mitigations in accordance with
+the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2,
+Speculative Store Bypass, and L1TF.
+
+The default behavior is unchanged.
+
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
+Reviewed-by: Jiri Kosina <jkosina@suse.cz>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H . Peter Anvin" <hpa@zytor.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Waiman Long <longman@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Jon Masters <jcm@redhat.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: linuxppc-dev@lists.ozlabs.org
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: linux-s390@vger.kernel.org
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: linux-arm-kernel@lists.infradead.org
+Cc: linux-arch@vger.kernel.org
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Randy Dunlap <rdunlap@infradead.org>
+Cc: Steven Price <steven.price@arm.com>
+Cc: Phil Auld <pauld@redhat.com>
+Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com
+
+Acked-by: Borislav Petkov <bp@suse.de>
+---
+ Documentation/kernel-parameters.txt | 16 +++++++++++-----
+ arch/x86/kernel/cpu/bugs.c | 11 +++++++++--
+ arch/x86/mm/kaiser.c | 4 +++-
+ 3 files changed, 23 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -287,7 +287,8 @@ static enum spectre_v2_mitigation_cmd __
+ int ret, i;
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+- if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
++ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
++ cpu_mitigations_off())
+ return SPECTRE_V2_CMD_NONE;
+ else {
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+@@ -453,7 +454,8 @@ static enum ssb_mitigation_cmd __init ss
+ char arg[20];
+ int ret, i;
+
+- if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
++ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
++ cpu_mitigations_off()) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+@@ -697,6 +699,11 @@ static void __init l1tf_select_mitigatio
+ if (!boot_cpu_has_bug(X86_BUG_L1TF))
+ return;
+
++ if (cpu_mitigations_off())
++ l1tf_mitigation = L1TF_MITIGATION_OFF;
++ else if (cpu_mitigations_auto_nosmt())
++ l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
++
+ override_cache_bits(&boot_cpu_data);
+
+ switch (l1tf_mitigation) {
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -10,6 +10,7 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+ #include <linux/ftrace.h>
++#include <linux/cpu.h>
+
+ #undef pr_fmt
+ #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+@@ -291,7 +292,8 @@ void __init kaiser_check_boottime_disabl
+ goto skip;
+ }
+
+- if (cmdline_find_option_bool(boot_command_line, "nopti"))
++ if (cmdline_find_option_bool(boot_command_line, "nopti") ||
++ cpu_mitigations_off())
+ goto disable;
+
+ skip:
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2315,15 +2315,20 @@ bytes respectively. Such letter suffixes
+ http://repo.or.cz/w/linux-2.6/mini2440.git
+
+ mitigations=
+- Control optional mitigations for CPU vulnerabilities.
+- This is a set of curated, arch-independent options, each
+- of which is an aggregation of existing arch-specific
+- options.
++ [X86] Control optional mitigations for CPU
++ vulnerabilities. This is a set of curated,
++ arch-independent options, each of which is an
++ aggregation of existing arch-specific options.
+
+ off
+ Disable all optional CPU mitigations. This
+ improves system performance, but it may also
+ expose users to several CPU vulnerabilities.
++ Equivalent to: nopti [X86]
++ nospectre_v2 [X86]
++ spectre_v2_user=off [X86]
++ spec_store_bypass_disable=off [X86]
++ l1tf=off [X86]
+
+ auto (default)
+ Mitigate all CPU vulnerabilities, but leave SMT
+@@ -2331,12 +2336,13 @@ bytes respectively. Such letter suffixes
+ users who don't want to be surprised by SMT
+ getting disabled across kernel upgrades, or who
+ have other ways of avoiding SMT-based attacks.
+- This is the default behavior.
++ Equivalent to: (default behavior)
+
+ auto,nosmt
+ Mitigate all CPU vulnerabilities, disabling SMT
+ if needed. This is for users who always want to
+ be fully mitigated, even if it means losing SMT.
++ Equivalent to: l1tf=flush,nosmt [X86]
+
+ mminit_loglevel=
+ [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
diff --git a/patches.fixes/0001-btrfs-Fix-bound-checking-in-qgroup_trace_new_subtree.patch b/patches.fixes/0001-btrfs-Fix-bound-checking-in-qgroup_trace_new_subtree.patch
new file mode 100644
index 0000000000..79ca4177cf
--- /dev/null
+++ b/patches.fixes/0001-btrfs-Fix-bound-checking-in-qgroup_trace_new_subtree.patch
@@ -0,0 +1,44 @@
+From 7ff2c2a1a71e83f74574b8001ea88deb3c166ad7 Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Mon, 18 Mar 2019 17:45:19 +0200
+Patch-mainline: v5.0
+Git-commit: 7ff2c2a1a71e83f74574b8001ea88deb3c166ad7
+References: pending fix for bsc#1063638
+Subject: [PATCH] btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks
+
+If 'cur_level' is 7 then the bound checking at the top of the function
+will actually pass. Later on, it's possible to dereference
+ds_path->nodes[cur_level+1] which will be an out of bounds.
+
+The correct check will be cur_level >= BTRFS_MAX_LEVEL - 1 .
+
+Fixes-coverty-id: 1440918
+Fixes-coverty-id: 1440911
+Fixes: ea49f3e73c4b ("btrfs: qgroup: Introduce function to find all new tree blocks of reloc tree")
+CC: stable@vger.kernel.org # 4.20+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index eb680b715dd6..e659d9d61107 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
+ int i;
+
+ /* Level sanity check */
+- if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
+- root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
++ if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
++ root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
+ root_level < cur_level) {
+ btrfs_err_rl(fs_info,
+ "%s: bad levels, cur_level=%d root_level=%d",
+--
+2.21.0
+
diff --git a/patches.fixes/0001-btrfs-qgroup-Search-commit-root-for-rescan-to-avoid-.patch b/patches.fixes/0001-btrfs-qgroup-Search-commit-root-for-rescan-to-avoid-.patch
new file mode 100644
index 0000000000..5ed1e1342d
--- /dev/null
+++ b/patches.fixes/0001-btrfs-qgroup-Search-commit-root-for-rescan-to-avoid-.patch
@@ -0,0 +1,104 @@
+From b6debf15d4753e0075a85ff119b0bb3c7e172782 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 14 May 2018 09:38:12 +0800
+Git-commit: b6debf15d4753e0075a85ff119b0bb3c7e172782
+Patch-mainline: v4.18
+References: bsc#1129326
+Subject: [PATCH 1/2] btrfs: qgroup: Search commit root for rescan to avoid
+ missing extent
+
+When doing qgroup rescan using the following script (modified from
+btrfs/017 test case), we can sometimes hit qgroup corruption.
+
+------
+umount $dev &> /dev/null
+umount $mnt &> /dev/null
+
+mkfs.btrfs -f -n 64k $dev
+mount $dev $mnt
+
+extent_size=8192
+
+xfs_io -f -d -c "pwrite 0 $extent_size" $mnt/foo > /dev/null
+btrfs subvolume snapshot $mnt $mnt/snap
+
+xfs_io -f -c "reflink $mnt/foo" $mnt/foo-reflink > /dev/null
+xfs_io -f -c "reflink $mnt/foo" $mnt/snap/foo-reflink > /dev/null
+xfs_io -f -c "reflink $mnt/foo" $mnt/snap/foo-reflink2 > /dev/unll
+btrfs quota enable $mnt
+
+ # -W is the new option to only wait rescan while not starting new one
+btrfs quota rescan -W $mnt
+btrfs qgroup show -prce $mnt
+umount $mnt
+
+ # Need to patch btrfs-progs to report qgroup mismatch as error
+btrfs check $dev || _fail
+------
+
+For fast machine, we can hit some corruption which missed accounting
+tree blocks:
+------
+qgroupid rfer excl max_rfer max_excl parent child
+-------- ---- ---- -------- -------- ------ -----
+0/5 8.00KiB 0.00B none none --- ---
+0/257 8.00KiB 0.00B none none --- ---
+------
+
+This is due to the fact that we're always searching commit root for
+btrfs_find_all_roots() at qgroup_rescan_leaf(), but the leaf we get is
+from current transaction, not commit root.
+
+And if our tree blocks get modified in current transaction, we won't
+find any owner in commit root, thus causing the corruption.
+
+Fix it by searching commit root for extent tree for
+qgroup_rescan_leaf().
+
+Reported-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2609,7 +2609,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info
+ struct btrfs_key found;
+ struct extent_buffer *scratch_leaf = NULL;
+ struct ulist *roots = NULL;
+- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
+ u64 num_bytes;
+ bool done;
+ int slot;
+@@ -2646,7 +2645,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info
+ btrfs_header_nritems(path->nodes[0]) - 1);
+ fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+- btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!scratch_leaf) {
+ ret = -ENOMEM;
+@@ -2685,7 +2683,6 @@ out:
+ btrfs_tree_read_unlock_blocking(scratch_leaf);
+ free_extent_buffer(scratch_leaf);
+ }
+- btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+ if (done && !ret)
+ ret = 1;
+@@ -2704,6 +2701,12 @@ static void btrfs_qgroup_rescan_worker(s
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
++ /*
++ * Rescan should only search for commit root, and any later difference
++ * should be recorded by qgroup
++ */
++ path->search_commit_root = 1;
++ path->skip_locking = 1;
+
+ err = 0;
+ while (!err && !btrfs_fs_closing(fs_info)) {
diff --git a/patches.fixes/0001-net-sysfs-call-dev_hold-if-kobject_init_and_add-succ.patch b/patches.fixes/0001-net-sysfs-call-dev_hold-if-kobject_init_and_add-succ.patch
new file mode 100644
index 0000000000..c8312e7a01
--- /dev/null
+++ b/patches.fixes/0001-net-sysfs-call-dev_hold-if-kobject_init_and_add-succ.patch
@@ -0,0 +1,64 @@
+From: YueHaibing <yuehaibing@huawei.com>
+Subject: net-sysfs: call dev_hold if kobject_init_and_add success
+Patch-mainline: v5.1-rc3
+Git-commit: a3e23f719f5c4a38ffb3d30c8d7632a4ed8ccd9e
+References: git-fixes
+
+In netdev_queue_add_kobject and rx_queue_add_kobject,
+if sysfs_create_group failed, kobject_put will call
+netdev_queue_release to decrease dev refcont, however
+dev_hold has not be called. So we will see this while
+unregistering dev:
+
+unregister_netdevice: waiting for bcsh0 to become free. Usage count = -1
+
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Fixes: d0d668371679 ("net: don't decrement kobj reference count on init failure")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: Denis Kirjanov <dkirjanov@suse.com>
+---
+ net/core/net-sysfs.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
+index 37488e5130fd..edafd949ec64 100644
+--- a/net/core/net-sysfs.c
++++ b/net/core/net-sysfs.c
+@@ -898,6 +898,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
+ if (error)
+ return error;
+
++ dev_hold(queue->dev);
++
+ if (dev->sysfs_rx_queue_group) {
+ error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
+ if (error) {
+@@ -907,7 +909,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+- dev_hold(queue->dev);
+
+ return error;
+ }
+@@ -1289,6 +1290,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
+ if (error)
+ return error;
+
++ dev_hold(queue->dev);
++
+ #ifdef CONFIG_BQL
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error) {
+@@ -1298,7 +1301,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
+ #endif
+
+ kobject_uevent(kobj, KOBJ_ADD);
+- dev_hold(queue->dev);
+
+ return 0;
+ }
+--
+2.12.3
+
diff --git a/patches.fixes/fuse-continue-to-send-FUSE_RELEASEDIR-when-FUSE_OPEN-returns-ENOSYS.patch b/patches.fixes/fuse-continue-to-send-FUSE_RELEASEDIR-when-FUSE_OPEN-returns-ENOSYS.patch
new file mode 100644
index 0000000000..6851785f41
--- /dev/null
+++ b/patches.fixes/fuse-continue-to-send-FUSE_RELEASEDIR-when-FUSE_OPEN-returns-ENOSYS.patch
@@ -0,0 +1,135 @@
+From 2e64ff154ce6ce9a8dc0f9556463916efa6ff460 Mon Sep 17 00:00:00 2001
+From: Chad Austin <chadaustin@fb.com>
+Date: Mon Dec 10 10:54:52 2018 -0800
+Subject: [PATCH] fuse: continue to send FUSE_RELEASEDIR when FUSE_OPEN returns ENOSYS
+Git-commit: 2e64ff154ce6ce9a8dc0f9556463916efa6ff460
+References: git-fixes
+Patch-mainline: v4.20-rc7
+
+When FUSE_OPEN returns ENOSYS, the no_open bit is set on the connection.
+
+Because the FUSE_RELEASE and FUSE_RELEASEDIR paths share code, this
+incorrectly caused the FUSE_RELEASEDIR request to be dropped and never sent
+to userspace.
+
+Pass an isdir bool to distinguish between FUSE_RELEASE and FUSE_RELEASEDIR
+inside of fuse_file_put.
+
+Fixes: 7678ac50615d ("fuse: support clients that don't implement 'open'")
+Cc: <stable@vger.kernel.org> # v3.14
+Signed-off-by: Chad Austin <chadaustin@fb.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Acked-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+
+---
+ fs/fuse/dir.c | 2 +-
+ fs/fuse/file.c | 19 ++++++++++---------
+ fs/fuse/fuse_i.h | 2 +-
+ 3 files changed, 12 insertions(+), 11 deletions(-)
+
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -1403,7 +1403,7 @@ static int fuse_dir_open(struct inode *i
+
+ static int fuse_dir_release(struct inode *inode, struct file *file)
+ {
+- fuse_release_common(file, FUSE_RELEASEDIR);
++ fuse_release_common(file, true);
+
+ return 0;
+ }
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -86,12 +86,12 @@ static void fuse_release_end(struct fuse
+ iput(req->misc.release.inode);
+ }
+
+-static void fuse_file_put(struct fuse_file *ff, bool sync)
++static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
+ {
+ if (atomic_dec_and_test(&ff->count)) {
+ struct fuse_req *req = ff->reserved_req;
+
+- if (ff->fc->no_open) {
++ if (ff->fc->no_open && !isdir) {
+ /*
+ * Drop the release request when client does not
+ * implement 'open'
+@@ -244,10 +244,11 @@ static void fuse_prepare_release(struct
+ req->in.args[0].value = inarg;
+ }
+
+-void fuse_release_common(struct file *file, int opcode)
++void fuse_release_common(struct file *file, bool isdir)
+ {
+ struct fuse_file *ff;
+ struct fuse_req *req;
++ int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+
+ ff = file->private_data;
+ if (unlikely(!ff))
+@@ -274,7 +275,7 @@ void fuse_release_common(struct file *fi
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
+ */
+- fuse_file_put(ff, ff->fc->destroy_req != NULL);
++ fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
+ }
+
+ static int fuse_open(struct inode *inode, struct file *file)
+@@ -290,7 +291,7 @@ static int fuse_release(struct inode *in
+ if (fc->writeback_cache)
+ write_inode_now(inode, 1);
+
+- fuse_release_common(file, FUSE_RELEASE);
++ fuse_release_common(file, false);
+
+ /* return value is ignored by VFS */
+ return 0;
+@@ -817,7 +818,7 @@ static void fuse_readpages_end(struct fu
+ page_cache_release(page);
+ }
+ if (req->ff)
+- fuse_file_put(req->ff, false);
++ fuse_file_put(req->ff, false, false);
+ }
+
+ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
+@@ -1467,7 +1468,7 @@ static void fuse_writepage_free(struct f
+ __free_page(req->pages[i]);
+
+ if (req->ff)
+- fuse_file_put(req->ff, false);
++ fuse_file_put(req->ff, false, false);
+ }
+
+ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+@@ -1624,7 +1625,7 @@ int fuse_write_inode(struct inode *inode
+ ff = __fuse_write_file_get(fc, fi);
+ err = fuse_flush_times(inode, ff);
+ if (ff)
+- fuse_file_put(ff, 0);
++ fuse_file_put(ff, false, false);
+
+ return err;
+ }
+@@ -1937,7 +1938,7 @@ static int fuse_writepages(struct addres
+ err = 0;
+ }
+ if (data.ff)
+- fuse_file_put(data.ff, false);
++ fuse_file_put(data.ff, false, false);
+
+ kfree(data.orig_pages);
+ out:
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -730,7 +730,7 @@ void fuse_sync_release(struct fuse_file
+ /**
+ * Send RELEASE or RELEASEDIR request
+ */
+-void fuse_release_common(struct file *file, int opcode);
++void fuse_release_common(struct file *file, bool isdir);
+
+ /**
+ * Send FSYNC or FSYNCDIR request
diff --git a/patches.fixes/fuse-fix-possibly-missed-wake-up-after-abort.patch b/patches.fixes/fuse-fix-possibly-missed-wake-up-after-abort.patch
new file mode 100644
index 0000000000..365bd1fbf6
--- /dev/null
+++ b/patches.fixes/fuse-fix-possibly-missed-wake-up-after-abort.patch
@@ -0,0 +1,58 @@
+From 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri Nov 9 15:52:16 2018 +0100
+Subject: [PATCH] fuse: fix possibly missed wake-up after abort
+Git-commit: 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772
+References: git-fixes
+Patch-mainline: v4.20-rc3
+
+In current fuse_drop_waiting() implementation it's possible that
+fuse_wait_aborted() will not be woken up in the unlikely case that
+fuse_abort_conn() + fuse_wait_aborted() runs in between checking
+fc->connected and calling atomic_dec(&fc->num_waiting).
+
+Do the atomic_dec_and_test() unconditionally, which also provides the
+necessary barrier against reordering with the fc->connected check.
+
+The explicit smp_mb() in fuse_wait_aborted() is not actually needed, since
+the spin_unlock() in fuse_abort_conn() provides the necessary RELEASE
+barrier after resetting fc->connected. However, this is not a performance
+sensitive path, and adding the explicit barrier makes it easier to
+document.
+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests")
+Cc: <stable@vger.kernel.org> #v4.19
+Acked-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
+
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index 6fe330c..a5e516a 100644
+
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
+
+ static void fuse_drop_waiting(struct fuse_conn *fc)
+ {
+- if (fc->connected) {
+- atomic_dec(&fc->num_waiting);
+- } else if (atomic_dec_and_test(&fc->num_waiting)) {
++ /*
++ * lockess check of fc->connected is okay, because atomic_dec_and_test()
++ * provides a memory barrier mached with the one in fuse_wait_aborted()
++ * to ensure no wake-up is missed.
++ */
++ if (atomic_dec_and_test(&fc->num_waiting) &&
++ !READ_ONCE(fc->connected)) {
+ /* wake up aborters */
+ wake_up_all(&fc->blocked_waitq);
+ }
+@@ -2221,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
+
+ void fuse_wait_aborted(struct fuse_conn *fc)
+ {
++ /* matches implicit memory barrier in fuse_drop_waiting() */
++ smp_mb();
+ wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
+ }
+
diff --git a/patches.suse/0001-btrfs-qgroup-Introduce-trace-event-to-analyse-the-nu.patch b/patches.suse/0001-btrfs-qgroup-Introduce-trace-event-to-analyse-the-nu.patch
new file mode 100644
index 0000000000..195c05ebbb
--- /dev/null
+++ b/patches.suse/0001-btrfs-qgroup-Introduce-trace-event-to-analyse-the-nu.patch
@@ -0,0 +1,82 @@
+From c337e7b02f71c4b2f6f2138807a284d2c4e1ac5e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:29 +0800
+Patch-mainline: v4.19
+Git-commit: c337e7b02f71c4b2f6f2138807a284d2c4e1ac5e
+References: bsc#1063638 dependency
+Subject: [PATCH 1/6] btrfs: qgroup: Introduce trace event to analyse the
+ number of dirty extents accounted
+
+Number of qgroup dirty extents is directly linked to the performance
+overhead, so add a new trace event, trace_qgroup_num_dirty_extents(), to
+record how many dirty extents is processed in
+btrfs_qgroup_account_extents().
+
+This will be pretty handy to analyze later balance performance
+improvement.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 4 ++++
+ include/trace/events/btrfs.h | 21 +++++++++++++++++++++
+ 2 files changed, 25 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2049,6 +2049,7 @@ int btrfs_qgroup_account_extents(struct
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct ulist *new_roots = NULL;
+ struct rb_node *node;
++ u64 num_dirty_extents = 0;
+ u64 qgroup_to_skip;
+ int ret = 0;
+
+@@ -2058,6 +2059,7 @@ int btrfs_qgroup_account_extents(struct
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
+
++ num_dirty_extents++;
+ trace_btrfs_qgroup_account_extents(fs_info, record);
+
+ if (!ret) {
+@@ -2102,6 +2104,8 @@ cleanup:
+ kfree(record);
+
+ }
++ trace_qgroup_num_dirty_extents(fs_info, trans->transid,
++ num_dirty_extents);
+ return ret;
+ }
+
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -1439,6 +1439,27 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_
+ TP_ARGS(fs_info, rec)
+ );
+
++TRACE_EVENT(qgroup_num_dirty_extents,
++
++ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid,
++ u64 num_dirty_extents),
++
++ TP_ARGS(fs_info, transid, num_dirty_extents),
++
++ TP_STRUCT__entry_btrfs(
++ __field( u64, transid )
++ __field( u64, num_dirty_extents )
++ ),
++
++ TP_fast_assign_btrfs(fs_info,
++ __entry->transid = transid;
++ __entry->num_dirty_extents = num_dirty_extents;
++ ),
++
++ TP_printk_btrfs("transid=%llu num_dirty_extents=%llu",
++ __entry->transid, __entry->num_dirty_extents)
++);
++
+ TRACE_EVENT(btrfs_qgroup_account_extent,
+
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/patches.suse/0001-btrfs-relocation-Delay-reloc-tree-deletion-after-mer.patch b/patches.suse/0001-btrfs-relocation-Delay-reloc-tree-deletion-after-mer.patch
new file mode 100644
index 0000000000..61229b1a96
--- /dev/null
+++ b/patches.suse/0001-btrfs-relocation-Delay-reloc-tree-deletion-after-mer.patch
@@ -0,0 +1,245 @@
+From d2311e69857815ae2f728b48e6730f833a617092 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 23 Jan 2019 15:15:14 +0800
+Patch-mainline: v5.1-rc1
+Git-commit: d2311e69857815ae2f728b48e6730f833a617092
+References: bsc#1063638
+Subject: [PATCH 1/5] btrfs: relocation: Delay reloc tree deletion after
+ merge_reloc_roots
+
+Relocation code will drop btrfs_root::reloc_root as soon as
+merge_reloc_root() finishes.
+
+However later qgroup code will need to access btrfs_root::reloc_root
+after merge_reloc_root() for delayed subtree rescan.
+
+So alter the timming of resetting btrfs_root:::reloc_root, make it
+happens after transaction commit.
+
+With this patch, we will introduce a new btrfs_root::state,
+BTRFS_ROOT_DEAD_RELOC_TREE, to info part of btrfs_root::reloc_tree user
+that although btrfs_root::reloc_tree is still non-NULL, but still it's
+not used any more.
+
+The lifespan of btrfs_root::reloc tree will become:
+ Old behavior | New
+------------------------------------------------------------------------
+btrfs_init_reloc_root() --- | btrfs_init_reloc_root() ---
+ set reloc_root | | set reloc_root |
+ | | |
+ | | |
+merge_reloc_root() | | merge_reloc_root() |
+|- btrfs_update_reloc_root() --- | |- btrfs_update_reloc_root() -+-
+ clear btrfs_root::reloc_root | set ROOT_DEAD_RELOC_TREE |
+ | record root into dirty |
+ | roots rbtree |
+ | |
+ | reloc_block_group() Or |
+ | btrfs_recover_relocation() |
+ | | After transaction commit |
+ | |- clean_dirty_subvols() ---
+ | clear btrfs_root::reloc_root
+
+During ROOT_DEAD_RELOC_TREE set lifespan, the only user of
+btrfs_root::reloc_tree should be qgroup.
+
+Since reloc root needs a longer life-span, this patch will also delay
+btrfs_drop_snapshot() call.
+Now btrfs_drop_snapshot() is called in clean_dirty_subvols().
+
+This patch will increase the size of btrfs_root by 16 bytes.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/ctree.h | 10 +++++
+ fs/btrfs/disk-io.c | 1
+ fs/btrfs/relocation.c | 85 ++++++++++++++++++++++++++++++++++++++++----------
+ 3 files changed, 79 insertions(+), 17 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1121,6 +1121,8 @@ struct btrfs_subvolume_writers {
+ #define BTRFS_ROOT_MULTI_LOG_TASKS 8
+ #define BTRFS_ROOT_DIRTY 9
+
++#define BTRFS_ROOT_DEAD_RELOC_TREE 11
++
+ /*
+ * in ram representation of the tree. extent_root is used for all allocations
+ * and for the extent tree extent_root root.
+@@ -1250,6 +1252,14 @@ struct btrfs_root {
+ u64 nr_ordered_extents;
+
+ /*
++ * Not empty if this subvolume root has gone through tree block swap
++ * (relocation)
++ *
++ * Will be used by reloc_control::dirty_subvol_roots.
++ */
++ struct list_head reloc_dirty_list;
++
++ /*
+ * Number of currently running SEND ioctls to prevent
+ * manipulation with the read-only status via SUBVOL_SETFLAGS
+ */
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1247,6 +1247,7 @@ static void __setup_root(u32 nodesize, u
+ INIT_LIST_HEAD(&root->delalloc_root);
+ INIT_LIST_HEAD(&root->ordered_extents);
+ INIT_LIST_HEAD(&root->ordered_root);
++ INIT_LIST_HEAD(&root->reloc_dirty_list);
+ INIT_LIST_HEAD(&root->logged_list[0]);
+ INIT_LIST_HEAD(&root->logged_list[1]);
+ spin_lock_init(&root->orphan_lock);
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -174,6 +174,8 @@ struct reloc_control {
+ struct mapping_tree reloc_root_tree;
+ /* list of reloc trees */
+ struct list_head reloc_roots;
++ /* list of subvolume trees that get relocated */
++ struct list_head dirty_subvol_roots;
+ /* size of metadata reservation for merging reloc trees */
+ u64 merging_rsv_size;
+ /* size of relocated tree nodes */
+@@ -1505,15 +1507,17 @@ int btrfs_update_reloc_root(struct btrfs
+ struct btrfs_root_item *root_item;
+ int ret;
+
+- if (!root->reloc_root)
++ if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
++ !root->reloc_root)
+ goto out;
+
+ reloc_root = root->reloc_root;
+ root_item = &reloc_root->root_item;
+
++ /* root->reloc_root will stay until current relocation finished */
+ if (root->fs_info->reloc_ctl->merge_reloc_tree &&
+ btrfs_root_refs(root_item) == 0) {
+- root->reloc_root = NULL;
++ set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
+ __del_reloc_root(reloc_root);
+ }
+
+@@ -2154,6 +2158,58 @@ static int find_next_key(struct btrfs_pa
+ }
+
+ /*
++ * Insert current subvolume into reloc_control::dirty_subvol_roots
++ */
++static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
++ struct reloc_control *rc,
++ struct btrfs_root *root)
++{
++ struct btrfs_root *reloc_root = root->reloc_root;
++ struct btrfs_root_item *reloc_root_item;
++
++ /* @root must be a subvolume tree root with a valid reloc tree */
++ ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
++ ASSERT(reloc_root);
++
++ reloc_root_item = &reloc_root->root_item;
++ memset(&reloc_root_item->drop_progress, 0,
++ sizeof(reloc_root_item->drop_progress));
++ reloc_root_item->drop_level = 0;
++ btrfs_set_root_refs(reloc_root_item, 0);
++ btrfs_update_reloc_root(trans, root);
++
++ if (list_empty(&root->reloc_dirty_list)) {
++ btrfs_grab_fs_root(root);
++ list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
++ }
++}
++
++static int clean_dirty_subvols(struct reloc_control *rc)
++{
++ struct btrfs_root *root;
++ struct btrfs_root *next;
++ int ret = 0;
++
++ list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
++ reloc_dirty_list) {
++ struct btrfs_root *reloc_root = root->reloc_root;
++
++ clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
++ list_del_init(&root->reloc_dirty_list);
++ root->reloc_root = NULL;
++ if (reloc_root) {
++ int ret2;
++
++ ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
++ if (ret2 < 0 && !ret)
++ ret = ret2;
++ }
++ btrfs_put_fs_root(root);
++ }
++ return ret;
++}
++
++/*
+ * merge the relocated tree blocks in reloc tree with corresponding
+ * fs tree.
+ */
+@@ -2291,13 +2347,8 @@ static noinline_for_stack int merge_relo
+ out:
+ btrfs_free_path(path);
+
+- if (err == 0) {
+- memset(&root_item->drop_progress, 0,
+- sizeof(root_item->drop_progress));
+- root_item->drop_level = 0;
+- btrfs_set_root_refs(root_item, 0);
+- btrfs_update_reloc_root(trans, root);
+- }
++ if (err == 0)
++ insert_dirty_subvol(trans, rc, root);
+
+ if (trans)
+ btrfs_end_transaction_throttle(trans, root);
+@@ -2452,14 +2503,6 @@ again:
+ last_snap = btrfs_root_rtransid(&reloc_root->root_item);
+ otransid = btrfs_root_otransid(&reloc_root->root_item);
+ objectid = reloc_root->root_key.offset;
+-
+- ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
+- if (ret < 0) {
+- if (list_empty(&reloc_root->root_list))
+- list_add_tail(&reloc_root->root_list,
+- &reloc_roots);
+- goto out;
+- }
+ }
+
+ if (found) {
+@@ -4183,6 +4226,9 @@ restart:
+ goto out_free;
+ }
+ btrfs_commit_transaction(trans, rc->extent_root);
++ ret = clean_dirty_subvols(rc);
++ if (ret < 0 && !err)
++ err = ret;
+ out_free:
+ btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
+ btrfs_free_path(path);
+@@ -4277,6 +4323,7 @@ static struct reloc_control *alloc_reloc
+ return NULL;
+
+ INIT_LIST_HEAD(&rc->reloc_roots);
++ INIT_LIST_HEAD(&rc->dirty_subvol_roots);
+ backref_cache_init(&rc->backref_cache);
+ mapping_tree_init(&rc->reloc_root_tree);
+ extent_io_tree_init(&rc->processed_blocks,
+@@ -4551,6 +4598,10 @@ int btrfs_recover_relocation(struct btrf
+ goto out_free;
+ }
+ err = btrfs_commit_transaction(trans, rc->extent_root);
++
++ ret = clean_dirty_subvols(rc);
++ if (ret < 0 && !err)
++ err = ret;
+ out_free:
+ kfree(rc);
+ out:
diff --git a/patches.suse/0002-btrfs-qgroup-Introduce-function-to-trace-two-swaped-.patch b/patches.suse/0002-btrfs-qgroup-Introduce-function-to-trace-two-swaped-.patch
new file mode 100644
index 0000000000..dcf2a31757
--- /dev/null
+++ b/patches.suse/0002-btrfs-qgroup-Introduce-function-to-trace-two-swaped-.patch
@@ -0,0 +1,237 @@
+From 25982561db7f43f29305704f9f24ff36ea7d5671 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:30 +0800
+Patch-mainline: v4.19
+Git-commit: 25982561db7f43f29305704f9f24ff36ea7d5671
+References: bsc#1063638
+Subject: [PATCH 2/6] btrfs: qgroup: Introduce function to trace two swaped
+ extents
+
+Introduce a new function, qgroup_trace_extent_swap(), which will be used
+later for balance qgroup speedup.
+
+The basis idea of balance is swapping tree blocks between reloc tree and
+the real file tree.
+
+The swap will happen in highest tree block, but there may be a lot of
+tree blocks involved.
+
+For example:
+ OO = Old tree blocks
+ NN = New tree blocks allocated during balance
+
+ File tree (257) Reloc tree for 257
+L2 OO NN
+ / \ / \
+L1 OO OO (a) OO NN (a)
+ / \ / \ / \ / \
+L0 OO OO OO OO OO OO NN NN
+ (b) (c) (b) (c)
+
+When calling qgroup_trace_extent_swap(), we will pass:
+@src_eb = OO(a)
+@dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ]
+@dst_level = 0
+@root_level = 1
+
+In that case, qgroup_trace_extent_swap() will search from OO(a) to
+reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty.
+
+The main work of qgroup_trace_extent_swap() can be split into 3 parts:
+
+1) Tree search from @src_eb
+ It should acts as a simplified btrfs_search_slot().
+ The key for search can be extracted from @dst_path->nodes[dst_level]
+ (first key).
+
+2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty
+ NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty.
+ They should be marked during preivous (@dst_level = 1) iteration.
+
+3) Mark file extents in leaves dirty
+ We don't have good way to pick out new file extents only.
+ So we still follow the old method by scanning all file extents in
+ the leave.
+
+This function can free us from keeping two pathes, thus later we only need
+to care about how to iterate all new tree blocks in reloc tree.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+[ copy changelog to function comment ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 163 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1629,6 +1629,169 @@ static int adjust_slots_upwards(struct b
+ return 0;
+ }
+
++/*
++ * Helper function to trace a subtree tree block swap.
++ *
++ * The swap will happen in highest tree block, but there may be a lot of
++ * tree blocks involved.
++ *
++ * For example:
++ * OO = Old tree blocks
++ * NN = New tree blocks allocated during balance
++ *
++ * File tree (257) Reloc tree for 257
++ * L2 OO NN
++ * / \ / \
++ * L1 OO OO (a) OO NN (a)
++ * / \ / \ / \ / \
++ * L0 OO OO OO OO OO OO NN NN
++ * (b) (c) (b) (c)
++ *
++ * When calling qgroup_trace_extent_swap(), we will pass:
++ * @src_eb = OO(a)
++ * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ]
++ * @dst_level = 0
++ * @root_level = 1
++ *
++ * In that case, qgroup_trace_extent_swap() will search from OO(a) to
++ * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty.
++ *
++ * The main work of qgroup_trace_extent_swap() can be split into 3 parts:
++ *
++ * 1) Tree search from @src_eb
++ * It should acts as a simplified btrfs_search_slot().
++ * The key for search can be extracted from @dst_path->nodes[dst_level]
++ * (first key).
++ *
++ * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty
++ * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty.
++ * They should be marked during preivous (@dst_level = 1) iteration.
++ *
++ * 3) Mark file extents in leaves dirty
++ * We don't have good way to pick out new file extents only.
++ * So we still follow the old method by scanning all file extents in
++ * the leave.
++ *
++ * This function can free us from keeping two pathes, thus later we only need
++ * to care about how to iterate all new tree blocks in reloc tree.
++ */
++static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
++ struct extent_buffer *src_eb,
++ struct btrfs_path *dst_path,
++ int dst_level, int root_level)
++{
++ struct btrfs_key key;
++ struct btrfs_path *src_path;
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ u32 nodesize = fs_info->tree_root->nodesize;
++ int cur_level = root_level;
++ int ret;
++
++ BUG_ON(dst_level > root_level);
++ /* Level mismatch */
++ if (btrfs_header_level(src_eb) != root_level)
++ return -EINVAL;
++
++ src_path = btrfs_alloc_path();
++ if (!src_path) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (dst_level)
++ btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
++ else
++ btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
++
++ /* For src_path */
++ extent_buffer_get(src_eb);
++ src_path->nodes[root_level] = src_eb;
++ src_path->slots[root_level] = dst_path->slots[root_level];
++ src_path->locks[root_level] = 0;
++
++ /* A simplified version of btrfs_search_slot() */
++ while (cur_level >= dst_level) {
++ struct btrfs_key src_key;
++ struct btrfs_key dst_key;
++
++ if (src_path->nodes[cur_level] == NULL) {
++ struct btrfs_key first_key;
++ struct extent_buffer *eb;
++ int parent_slot;
++ u64 child_gen;
++ u64 child_bytenr;
++
++ eb = src_path->nodes[cur_level + 1];
++ parent_slot = src_path->slots[cur_level + 1];
++ child_bytenr = btrfs_node_blockptr(eb, parent_slot);
++ child_gen = btrfs_node_ptr_generation(eb, parent_slot);
++ btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
++
++ eb = read_tree_block(fs_info->tree_root, child_bytenr,
++ child_gen, cur_level, &first_key);
++ if (IS_ERR(eb)) {
++ ret = PTR_ERR(eb);
++ goto out;
++ } else if (!extent_buffer_uptodate(eb)) {
++ free_extent_buffer(eb);
++ ret = -EIO;
++ goto out;
++ }
++
++ src_path->nodes[cur_level] = eb;
++
++ btrfs_tree_read_lock(eb);
++ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++ src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
++ }
++
++ src_path->slots[cur_level] = dst_path->slots[cur_level];
++ if (cur_level) {
++ btrfs_node_key_to_cpu(dst_path->nodes[cur_level],
++ &dst_key, dst_path->slots[cur_level]);
++ btrfs_node_key_to_cpu(src_path->nodes[cur_level],
++ &src_key, src_path->slots[cur_level]);
++ } else {
++ btrfs_item_key_to_cpu(dst_path->nodes[cur_level],
++ &dst_key, dst_path->slots[cur_level]);
++ btrfs_item_key_to_cpu(src_path->nodes[cur_level],
++ &src_key, src_path->slots[cur_level]);
++ }
++ /* Content mismatch, something went wrong */
++ if (btrfs_comp_cpu_keys(&dst_key, &src_key)) {
++ ret = -ENOENT;
++ goto out;
++ }
++ cur_level--;
++ }
++
++ /*
++ * Now both @dst_path and @src_path have been populated, record the tree
++ * blocks for qgroup accounting.
++ */
++ ret = btrfs_qgroup_trace_extent(trans, fs_info,
++ src_path->nodes[dst_level]->start, nodesize, GFP_NOFS);
++ if (ret < 0)
++ goto out;
++ ret = btrfs_qgroup_trace_extent(trans, fs_info,
++ dst_path->nodes[dst_level]->start, nodesize, GFP_NOFS);
++ if (ret < 0)
++ goto out;
++
++ /* Record leaf file extents */
++ if (dst_level == 0) {
++ ret = btrfs_qgroup_trace_leaf_items(trans, fs_info->tree_root,
++ src_path->nodes[0]);
++ if (ret < 0)
++ goto out;
++ ret = btrfs_qgroup_trace_leaf_items(trans, fs_info->tree_root,
++ dst_path->nodes[0]);
++ }
++out:
++ btrfs_free_path(src_path);
++ return ret;
++}
++
+ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
diff --git a/patches.suse/0002-btrfs-qgroup-Refactor-btrfs_qgroup_trace_subtree_swa.patch b/patches.suse/0002-btrfs-qgroup-Refactor-btrfs_qgroup_trace_subtree_swa.patch
new file mode 100644
index 0000000000..ef28076dec
--- /dev/null
+++ b/patches.suse/0002-btrfs-qgroup-Refactor-btrfs_qgroup_trace_subtree_swa.patch
@@ -0,0 +1,138 @@
+From 5aea1a4fcf1e4fe3daea6f18fb66cbe49439bd8e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 23 Jan 2019 15:15:15 +0800
+Patch-mainline: v5.1
+Git-commit: 5aea1a4fcf1e4fe3daea6f18fb66cbe49439bd8e
+References: bsc#1063638
+Subject: [PATCH 2/5] btrfs: qgroup: Refactor btrfs_qgroup_trace_subtree_swap
+
+Refactor btrfs_qgroup_trace_subtree_swap() into
+qgroup_trace_subtree_swap(), which only needs two extent buffer and some
+other bool to control the behavior.
+
+This provides the basis for later delayed subtree scan work.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 77 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 56 insertions(+), 21 deletions(-)
+
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 4e473a998219..7a6948989655 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2017,6 +2017,59 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
+ return ret;
+ }
+
++static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
++ struct extent_buffer *src_eb,
++ struct extent_buffer *dst_eb,
++ u64 last_snapshot, bool trace_leaf)
++{
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ struct btrfs_path *dst_path = NULL;
++ int level;
++ int ret;
++
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
++ return 0;
++
++ /* Wrong parameter order */
++ if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
++ btrfs_err_rl(fs_info,
++ "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
++ btrfs_header_generation(src_eb),
++ btrfs_header_generation(dst_eb));
++ return -EUCLEAN;
++ }
++
++ if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
++ ret = -EIO;
++ goto out;
++ }
++
++ level = btrfs_header_level(dst_eb);
++ dst_path = btrfs_alloc_path();
++ if (!dst_path) {
++ ret = -ENOMEM;
++ goto out;
++ }
++ /* For dst_path */
++ extent_buffer_get(dst_eb);
++ dst_path->nodes[level] = dst_eb;
++ dst_path->slots[level] = 0;
++ dst_path->locks[level] = 0;
++
++ /* Do the generation aware breadth-first search */
++ ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
++ level, last_snapshot, trace_leaf);
++ if (ret < 0)
++ goto out;
++ ret = 0;
++
++out:
++ btrfs_free_path(dst_path);
++ if (ret < 0)
++ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
++ return ret;
++}
++
+ /*
+ * Inform qgroup to trace subtree swap used in balance.
+ *
+@@ -2042,14 +2095,12 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+ u64 last_snapshot)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+- struct btrfs_path *dst_path = NULL;
+ struct btrfs_key first_key;
+ struct extent_buffer *src_eb = NULL;
+ struct extent_buffer *dst_eb = NULL;
+ bool trace_leaf = false;
+ u64 child_gen;
+ u64 child_bytenr;
+- int level;
+ int ret;
+
+ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+@@ -2100,22 +2151,9 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+ goto out;
+ }
+
+- level = btrfs_header_level(dst_eb);
+- dst_path = btrfs_alloc_path();
+- if (!dst_path) {
+- ret = -ENOMEM;
+- goto out;
+- }
+-
+- /* For dst_path */
+- extent_buffer_get(dst_eb);
+- dst_path->nodes[level] = dst_eb;
+- dst_path->slots[level] = 0;
+- dst_path->locks[level] = 0;
+-
+- /* Do the generation-aware breadth-first search */
+- ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
+- level, last_snapshot, trace_leaf);
++ /* Do the generation aware breadth-first search */
++ ret = qgroup_trace_subtree_swap(trans, src_eb, dst_eb, last_snapshot,
++ trace_leaf);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+@@ -2123,9 +2161,6 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+ out:
+ free_extent_buffer(src_eb);
+ free_extent_buffer(dst_eb);
+- btrfs_free_path(dst_path);
+- if (ret < 0)
+- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ return ret;
+ }
+
+--
+2.21.0
+
diff --git a/patches.suse/0003-btrfs-qgroup-Introduce-function-to-find-all-new-tree.patch b/patches.suse/0003-btrfs-qgroup-Introduce-function-to-find-all-new-tree.patch
new file mode 100644
index 0000000000..75f52dac6e
--- /dev/null
+++ b/patches.suse/0003-btrfs-qgroup-Introduce-function-to-find-all-new-tree.patch
@@ -0,0 +1,184 @@
+From ea49f3e73c4b7252c1569906c1b2cd54605af3c9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:31 +0800
+Patch-mainline: v4.19
+Git-commit: ea49f3e73c4b7252c1569906c1b2cd54605af3c9
+References: bsc#1063638
+Subject: [PATCH 3/6] btrfs: qgroup: Introduce function to find all new tree
+ blocks of reloc tree
+
+Introduce new function, qgroup_trace_new_subtree_blocks(), to iterate
+all new tree blocks in a reloc tree.
+So that qgroup could skip unrelated tree blocks during balance, which
+should hugely speedup balance speed when quota is enabled.
+
+The function qgroup_trace_new_subtree_blocks() itself only cares about
+new tree blocks in reloc tree.
+
+All its main works are:
+
+1) Read out tree blocks according to parent pointers
+
+2) Do recursive depth-first search
+ Will call the same function on all its children tree blocks, with
+ search level set to current level -1.
+ And will also skip all children whose generation is smaller than
+ @last_snapshot.
+
+3) Call qgroup_trace_extent_swap() to trace tree blocks
+
+So although we have parameter list related to source file tree, it's not
+used at all, but only passed to qgroup_trace_extent_swap().
+Thus despite the tree read code, the core should be pretty short and all
+about recursive depth-first search.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 135 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1792,6 +1792,141 @@ out:
+ return ret;
+ }
+
++/*
++ * Helper function to do recursive generation-aware depth-first search, to
++ * locate all new tree blocks in a subtree of reloc tree.
++ *
++ * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot)
++ * reloc tree
++ * L2 NN (a)
++ * / \
++ * L1 OO NN (b)
++ * / \ / \
++ * L0 OO OO OO NN
++ * (c) (d)
++ * If we pass:
++ * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ],
++ * @cur_level = 1
++ * @root_level = 1
++ *
++ * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace
++ * above tree blocks along with their counter parts in file tree.
++ * While during search, old tree blocsk OO(c) will be skiped as tree block swap
++ * won't affect OO(c).
++ */
++static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
++ struct extent_buffer *src_eb,
++ struct btrfs_path *dst_path,
++ int cur_level, int root_level,
++ u64 last_snapshot)
++{
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ struct extent_buffer *eb;
++ bool need_cleanup = false;
++ int ret = 0;
++ int i;
++
++ /* Level sanity check */
++ if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
++ root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
++ root_level < cur_level) {
++ btrfs_err_rl(fs_info,
++ "%s: bad levels, cur_level=%d root_level=%d",
++ __func__, cur_level, root_level);
++ return -EUCLEAN;
++ }
++
++ /* Read the tree block if needed */
++ if (dst_path->nodes[cur_level] == NULL) {
++ struct btrfs_key first_key;
++ int parent_slot;
++ u64 child_gen;
++ u64 child_bytenr;
++
++ /*
++ * dst_path->nodes[root_level] must be initialized before
++ * calling this function.
++ */
++ if (cur_level == root_level) {
++ btrfs_err_rl(fs_info,
++ "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
++ __func__, root_level, root_level, cur_level);
++ return -EUCLEAN;
++ }
++
++ /*
++ * We need to get child blockptr/gen from parent before we can
++ * read it.
++ */
++ eb = dst_path->nodes[cur_level + 1];
++ parent_slot = dst_path->slots[cur_level + 1];
++ child_bytenr = btrfs_node_blockptr(eb, parent_slot);
++ child_gen = btrfs_node_ptr_generation(eb, parent_slot);
++ btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
++
++ /* This node is old, no need to trace */
++ if (child_gen < last_snapshot)
++ goto out;
++
++ eb = read_tree_block(fs_info->tree_root, child_bytenr, child_gen,
++ cur_level, &first_key);
++ if (IS_ERR(eb)) {
++ ret = PTR_ERR(eb);
++ goto out;
++ } else if (!extent_buffer_uptodate(eb)) {
++ free_extent_buffer(eb);
++ ret = -EIO;
++ goto out;
++ }
++
++ dst_path->nodes[cur_level] = eb;
++ dst_path->slots[cur_level] = 0;
++
++ btrfs_tree_read_lock(eb);
++ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
++ dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
++ need_cleanup = true;
++ }
++
++ /* Now record this tree block and its counter part for qgroups */
++ ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
++ root_level);
++ if (ret < 0)
++ goto cleanup;
++
++ eb = dst_path->nodes[cur_level];
++
++ if (cur_level > 0) {
++ /* Iterate all child tree blocks */
++ for (i = 0; i < btrfs_header_nritems(eb); i++) {
++ /* Skip old tree blocks as they won't be swapped */
++ if (btrfs_node_ptr_generation(eb, i) < last_snapshot)
++ continue;
++ dst_path->slots[cur_level] = i;
++
++ /* Recursive call (at most 7 times) */
++ ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
++ dst_path, cur_level - 1, root_level,
++ last_snapshot);
++ if (ret < 0)
++ goto cleanup;
++ }
++ }
++
++cleanup:
++ if (need_cleanup) {
++ /* Clean up */
++ btrfs_tree_unlock_rw(dst_path->nodes[cur_level],
++ dst_path->locks[cur_level]);
++ free_extent_buffer(dst_path->nodes[cur_level]);
++ dst_path->nodes[cur_level] = NULL;
++ dst_path->slots[cur_level] = 0;
++ dst_path->locks[cur_level] = 0;
++ }
++out:
++ return ret;
++}
++
+ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
diff --git a/patches.suse/0003-btrfs-qgroup-Introduce-per-root-swapped-blocks-infra.patch b/patches.suse/0003-btrfs-qgroup-Introduce-per-root-swapped-blocks-infra.patch
new file mode 100644
index 0000000000..52ea6954f2
--- /dev/null
+++ b/patches.suse/0003-btrfs-qgroup-Introduce-per-root-swapped-blocks-infra.patch
@@ -0,0 +1,415 @@
+From 370a11b8114bcca3738fe6a5d7ed8babcc212f39 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 23 Jan 2019 15:15:16 +0800
+Patch-mainline: v5.1
+Git-commit: 370a11b8114bcca3738fe6a5d7ed8babcc212f39
+References: bsc#1063638
+Subject: [PATCH 3/5] btrfs: qgroup: Introduce per-root swapped blocks
+ infrastructure
+
+To allow delayed subtree swap rescan, btrfs needs to record per-root
+information about which tree blocks get swapped. This patch introduces
+the required infrastructure.
+
+The designed workflow will be:
+
+1) Record the subtree root block that gets swapped.
+
+ During subtree swap:
+ O = Old tree blocks
+ N = New tree blocks
+ reloc tree subvolume tree X
+ Root Root
+ / \ / \
+ NA OB OA OB
+ / | | \ / | | \
+ NC ND OE OF OC OD OE OF
+
+ In this case, NA and OA are going to be swapped, record (NA, OA) into
+ subvolume tree X.
+
+2) After subtree swap.
+ reloc tree subvolume tree X
+ Root Root
+ / \ / \
+ OA OB NA OB
+ / | | \ / | | \
+ OC OD OE OF NC ND OE OF
+
+3a) COW happens for OB
+ If we are going to COW tree block OB, we check OB's bytenr against
+ tree X's swapped_blocks structure.
+ If it doesn't fit any, nothing will happen.
+
+3b) COW happens for NA
+ Check NA's bytenr against tree X's swapped_blocks, and get a hit.
+ Then we do subtree scan on both subtrees OA and NA.
+ Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND).
+
+ Then no matter what we do to subvolume tree X, qgroup numbers will
+ still be correct.
+ Then NA's record gets removed from X's swapped_blocks.
+
+4) Transaction commit
+ Any record in X's swapped_blocks gets removed, since there is no
+ modification to swapped subtrees, no need to trigger heavy qgroup
+ subtree rescan for them.
+
+This will introduce 128 bytes overhead for each btrfs_root even qgroup
+is not enabled. This is to reduce memory allocations and potential
+failures.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/ctree.h | 14 ++++
+ fs/btrfs/disk-io.c | 1
+ fs/btrfs/qgroup.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/qgroup.h | 93 ++++++++++++++++++++++++++++++
+ fs/btrfs/relocation.c | 7 ++
+ fs/btrfs/transaction.c | 1
+ 6 files changed, 266 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1124,6 +1124,17 @@ struct btrfs_subvolume_writers {
+ #define BTRFS_ROOT_DEAD_RELOC_TREE 11
+
+ /*
++ * Record swapped tree blocks of a subvolume tree for delayed subtree trace
++ * code. For detail check comment in fs/btrfs/qgroup.c.
++ */
++struct btrfs_qgroup_swapped_blocks {
++ spinlock_t lock;
++ /* RM_EMPTY_ROOT() of above blocks[] */
++ bool swapped;
++ struct rb_root blocks[BTRFS_MAX_LEVEL];
++};
++
++/*
+ * in ram representation of the tree. extent_root is used for all allocations
+ * and for the extent tree extent_root root.
+ */
+@@ -1183,6 +1194,9 @@ struct btrfs_root {
+
+ u64 highest_objectid;
+
++ /* Record pairs of swapped blocks for qgroup */
++ struct btrfs_qgroup_swapped_blocks swapped_blocks;
++
+ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ /* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */
+ u64 alloc_bytenr;
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1292,6 +1292,7 @@ static void __setup_root(u32 nodesize, u
+ root->anon_dev = 0;
+
+ spin_lock_init(&root->root_item_lock);
++ btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
+ }
+
+ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3768,3 +3768,153 @@ void btrfs_qgroup_check_reserved_leak(st
+ }
+ extent_changeset_release(&changeset);
+ }
++
++void btrfs_qgroup_init_swapped_blocks(
++ struct btrfs_qgroup_swapped_blocks *swapped_blocks)
++{
++ int i;
++
++ spin_lock_init(&swapped_blocks->lock);
++ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
++ swapped_blocks->blocks[i] = RB_ROOT;
++ swapped_blocks->swapped = false;
++}
++
++/*
++ * Delete all swapped blocks record of @root.
++ * Every record here means we skipped a full subtree scan for qgroup.
++ *
++ * Gets called when committing one transaction.
++ */
++void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
++{
++ struct btrfs_qgroup_swapped_blocks *swapped_blocks;
++ int i;
++
++ swapped_blocks = &root->swapped_blocks;
++
++ spin_lock(&swapped_blocks->lock);
++ if (!swapped_blocks->swapped)
++ goto out;
++ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
++ struct rb_root *cur_root = &swapped_blocks->blocks[i];
++ struct btrfs_qgroup_swapped_block *entry;
++ struct btrfs_qgroup_swapped_block *next;
++
++ rbtree_postorder_for_each_entry_safe(entry, next, cur_root,
++ node)
++ kfree(entry);
++ swapped_blocks->blocks[i] = RB_ROOT;
++ }
++ swapped_blocks->swapped = false;
++out:
++ spin_unlock(&swapped_blocks->lock);
++}
++
++/*
++ * Add subtree roots record into @subvol_root.
++ *
++ * @subvol_root: tree root of the subvolume tree get swapped
++ * @bg: block group under balance
++ * @subvol_parent/slot: pointer to the subtree root in subvolume tree
++ * @reloc_parent/slot: pointer to the subtree root in reloc tree
++ * BOTH POINTERS ARE BEFORE TREE SWAP
++ * @last_snapshot: last snapshot generation of the subvolume tree
++ */
++int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
++ struct btrfs_root *subvol_root,
++ struct btrfs_block_group_cache *bg,
++ struct extent_buffer *subvol_parent, int subvol_slot,
++ struct extent_buffer *reloc_parent, int reloc_slot,
++ u64 last_snapshot)
++{
++ struct btrfs_fs_info *fs_info = subvol_root->fs_info;
++ struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
++ struct btrfs_qgroup_swapped_block *block;
++ struct rb_node **cur;
++ struct rb_node *parent = NULL;
++ int level = btrfs_header_level(subvol_parent) - 1;
++ int ret = 0;
++
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
++ return 0;
++
++ if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
++ btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
++ btrfs_err_rl(fs_info,
++ "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
++ __func__,
++ btrfs_node_ptr_generation(subvol_parent, subvol_slot),
++ btrfs_node_ptr_generation(reloc_parent, reloc_slot));
++ return -EUCLEAN;
++ }
++
++ block = kmalloc(sizeof(*block), GFP_NOFS);
++ if (!block) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ /*
++ * @reloc_parent/slot is still before swap, while @block is going to
++ * record the bytenr after swap, so we do the swap here.
++ */
++ block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot);
++ block->subvol_generation = btrfs_node_ptr_generation(reloc_parent,
++ reloc_slot);
++ block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot);
++ block->reloc_generation = btrfs_node_ptr_generation(subvol_parent,
++ subvol_slot);
++ block->last_snapshot = last_snapshot;
++ block->level = level;
++ if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
++ block->trace_leaf = true;
++ else
++ block->trace_leaf = false;
++ btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot);
++
++ /* Insert @block into @blocks */
++ spin_lock(&blocks->lock);
++ cur = &blocks->blocks[level].rb_node;
++ while (*cur) {
++ struct btrfs_qgroup_swapped_block *entry;
++
++ parent = *cur;
++ entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
++ node);
++
++ if (entry->subvol_bytenr < block->subvol_bytenr) {
++ cur = &(*cur)->rb_left;
++ } else if (entry->subvol_bytenr > block->subvol_bytenr) {
++ cur = &(*cur)->rb_right;
++ } else {
++ if (entry->subvol_generation !=
++ block->subvol_generation ||
++ entry->reloc_bytenr != block->reloc_bytenr ||
++ entry->reloc_generation !=
++ block->reloc_generation) {
++ /*
++ * Duplicated but mismatch entry found.
++ * Shouldn't happen.
++ *
++ * Marking qgroup inconsistent should be enough
++ * for end users.
++ */
++ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
++ ret = -EEXIST;
++ }
++ kfree(block);
++ goto out_unlock;
++ }
++ }
++ rb_link_node(&block->node, parent, cur);
++ rb_insert_color(&block->node, &blocks->blocks[level]);
++ blocks->swapped = true;
++out_unlock:
++ spin_unlock(&blocks->lock);
++out:
++ if (ret < 0)
++ fs_info->qgroup_flags |=
++ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
++ return ret;
++}
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -19,6 +19,8 @@
+ #ifndef __BTRFS_QGROUP__
+ #define __BTRFS_QGROUP__
+
++#include <linux/spinlock.h>
++#include <linux/rbtree.h>
+ #include "ulist.h"
+ #include "delayed-ref.h"
+
+@@ -62,6 +64,66 @@ enum btrfs_qgroup_rsv_type {
+ };
+
+ /*
++ * Special performance optimization for balance.
++ *
++ * For balance, we need to swap subtree of subvolume and reloc trees.
++ * In theory, we need to trace all subtree blocks of both subvolume and reloc
++ * trees, since their owner has changed during such swap.
++ *
++ * However since balance has ensured that both subtrees are containing the
++ * same contents and have the same tree structures, such swap won't cause
++ * qgroup number change.
++ *
++ * But there is a race window between subtree swap and transaction commit,
++ * during that window, if we increase/decrease tree level or merge/split tree
++ * blocks, we still need to trace the original subtrees.
++ *
++ * So for balance, we use a delayed subtree tracing, whose workflow is:
++ *
++ * 1) Record the subtree root block get swapped.
++ *
++ * During subtree swap:
++ * O = Old tree blocks
++ * N = New tree blocks
++ * reloc tree subvolume tree X
++ * Root Root
++ * / \ / \
++ * NA OB OA OB
++ * / | | \ / | | \
++ * NC ND OE OF OC OD OE OF
++ *
++ * In this case, NA and OA are going to be swapped, record (NA, OA) into
++ * subvolume tree X.
++ *
++ * 2) After subtree swap.
++ * reloc tree subvolume tree X
++ * Root Root
++ * / \ / \
++ * OA OB NA OB
++ * / | | \ / | | \
++ * OC OD OE OF NC ND OE OF
++ *
++ * 3a) COW happens for OB
++ * If we are going to COW tree block OB, we check OB's bytenr against
++ * tree X's swapped_blocks structure.
++ * If it doesn't fit any, nothing will happen.
++ *
++ * 3b) COW happens for NA
++ * Check NA's bytenr against tree X's swapped_blocks, and get a hit.
++ * Then we do subtree scan on both subtrees OA and NA.
++ * Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND).
++ *
++ * Then no matter what we do to subvolume tree X, qgroup numbers will
++ * still be correct.
++ * Then NA's record gets removed from X's swapped_blocks.
++ *
++ * 4) Transaction commit
++ * Any record in X's swapped_blocks gets removed, since there is no
++ * modification to the swapped subtrees, no need to trigger heavy qgroup
++ * subtree rescan for them.
++ */
++
++/*
+ * Represents how many bytes we have reserved for this qgroup.
+ *
+ * Each type should have different reservation behavior.
+@@ -75,6 +137,24 @@ struct btrfs_qgroup_rsv {
+ u64 values[BTRFS_QGROUP_RSV_LAST];
+ };
+
++struct btrfs_qgroup_swapped_block {
++ struct rb_node node;
++
++ int level;
++ bool trace_leaf;
++
++ /* bytenr/generation of the tree block in subvolume tree after swap */
++ u64 subvol_bytenr;
++ u64 subvol_generation;
++
++ /* bytenr/generation of the tree block in reloc tree after swap */
++ u64 reloc_bytenr;
++ u64 reloc_generation;
++
++ u64 last_snapshot;
++ struct btrfs_key first_key;
++};
++
+ /*
+ * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ */
+@@ -298,4 +378,17 @@ void btrfs_qgroup_free_meta_all_pertrans
+ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
+
+ void btrfs_qgroup_check_reserved_leak(struct inode *inode);
++
++/* btrfs_qgroup_swapped_blocks related functions */
++void btrfs_qgroup_init_swapped_blocks(
++ struct btrfs_qgroup_swapped_blocks *swapped_blocks);
++
++void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
++int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
++ struct btrfs_root *subvol_root,
++ struct btrfs_block_group_cache *bg,
++ struct extent_buffer *subvol_parent, int subvol_slot,
++ struct extent_buffer *reloc_parent, int reloc_slot,
++ u64 last_snapshot);
++
+ #endif /* __BTRFS_QGROUP__ */
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1934,6 +1934,13 @@ again:
+ if (ret < 0)
+ break;
+
++ btrfs_node_key_to_cpu(parent, &first_key, slot);
++ ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
++ rc->block_group, parent, slot,
++ path->nodes[level], path->slots[level],
++ last_snapshot);
++ if (ret < 0)
++ break;
+ /*
+ * swap blocks in fs tree and reloc tree.
+ */
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -140,6 +140,7 @@ static noinline void switch_commit_roots
+ if (is_fstree(root->objectid))
+ btrfs_unpin_free_ino(root);
+ clear_btree_io_tree(&root->dirty_log_pages);
++ btrfs_qgroup_clean_swapped_blocks(root);
+ }
+
+ /* We can free old roots now. */
diff --git a/patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch b/patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch
new file mode 100644
index 0000000000..7d730a9677
--- /dev/null
+++ b/patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch
@@ -0,0 +1,226 @@
+From f616f5cd9da7fceb7d884812da380b26040cd083 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 23 Jan 2019 15:15:17 +0800
+Patch-mainline: v5.1
+Git-commit: f616f5cd9da7fceb7d884812da380b26040cd083
+References: bsc#1063638
+Subject: [PATCH 4/5] btrfs: qgroup: Use delayed subtree rescan for balance
+
+Before this patch, qgroup code traces the whole subtree of subvolume and
+reloc trees unconditionally.
+
+This makes qgroup numbers consistent, but it could cause tons of
+unnecessary extent tracing, which causes a lot of overhead.
+
+However for subtree swap of balance, just swap both subtrees because
+they contain the same contents and tree structure, so qgroup numbers
+won't change.
+
+It's the race window between subtree swap and transaction commit could
+cause qgroup number change.
+
+This patch will delay the qgroup subtree scan until COW happens for the
+subtree root.
+
+So if there is no other operations for the fs, balance won't cause extra
+qgroup overhead. (best case scenario)
+Depending on the workload, most of the subtree scan can still be
+avoided.
+
+Only for worst case scenario, it will fall back to old subtree swap
+overhead. (scan all swapped subtrees)
+
+[[Benchmark]]
+Hardware:
+ VM 4G vRAM, 8 vCPUs,
+ disk is using 'unsafe' cache mode,
+ backing device is SAMSUNG 850 evo SSD.
+ Host has 16G ram.
+
+Mkfs parameter:
+ --nodesize 4K (To bump up tree size)
+
+Initial subvolume contents:
+ 4G data copied from /usr and /lib.
+ (With enough regular small files)
+
+Snapshots:
+ 16 snapshots of the original subvolume.
+ each snapshot has 3 random files modified.
+
+balance parameter:
+ -m
+
+So the content should be pretty similar to a real world root fs layout.
+
+And after file system population, there is no other activity, so it
+should be the best case scenario.
+
+ | v4.20-rc1 | w/ patchset | diff
+-----------------------------------------------------------------------
+relocated extents | 22615 | 22457 | -0.1%
+qgroup dirty extents | 163457 | 121606 | -25.6%
+time (sys) | 22.884s | 18.842s | -17.6%
+time (real) | 27.724s | 22.884s | -17.5%
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/ctree.c | 8 ++++
+ fs/btrfs/qgroup.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/qgroup.h | 2 +
+ fs/btrfs/relocation.c | 14 ++-----
+ 4 files changed, 103 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -25,6 +25,7 @@
+ #include "transaction.h"
+ #include "print-tree.h"
+ #include "locking.h"
++#include "qgroup.h"
+
+ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_path *path, int level);
+@@ -1554,6 +1555,13 @@ noinline int btrfs_cow_block(struct btrf
+ btrfs_set_lock_blocking(parent);
+ btrfs_set_lock_blocking(buf);
+
++ /*
++ * Before CoWing this block for later modification, check if it's
++ * the subtree root and do the delayed subtree trace if needed.
++ *
++ * Also We don't care about the error, as it's handled internally.
++ */
++ btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
+ ret = __btrfs_cow_block(trans, root, buf, parent,
+ parent_slot, cow_ret, search_start, 0);
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3918,3 +3918,91 @@ out:
+ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ return ret;
+ }
++
++/*
++ * Check if the tree block is a subtree root, and if so do the needed
++ * delayed subtree trace for qgroup.
++ *
++ * This is called during btrfs_cow_block().
++ */
++int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct extent_buffer *subvol_eb)
++{
++ struct btrfs_fs_info *fs_info = root->fs_info;
++ struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
++ struct btrfs_qgroup_swapped_block *block;
++ struct extent_buffer *reloc_eb = NULL;
++ struct rb_node *node;
++ bool found = false;
++ bool swapped = false;
++ int level = btrfs_header_level(subvol_eb);
++ int ret = 0;
++ int i;
++
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
++ return 0;
++ if (!is_fstree(root->root_key.objectid) || !root->reloc_root)
++ return 0;
++
++ spin_lock(&blocks->lock);
++ if (!blocks->swapped) {
++ spin_unlock(&blocks->lock);
++ return 0;
++ }
++ node = blocks->blocks[level].rb_node;
++
++ while (node) {
++ block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
++ if (block->subvol_bytenr < subvol_eb->start) {
++ node = node->rb_left;
++ } else if (block->subvol_bytenr > subvol_eb->start) {
++ node = node->rb_right;
++ } else {
++ found = true;
++ break;
++ }
++ }
++ if (!found) {
++ spin_unlock(&blocks->lock);
++ goto out;
++ }
++ /* Found one, remove it from @blocks first and update blocks->swapped */
++ rb_erase(&block->node, &blocks->blocks[level]);
++ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
++ if (RB_EMPTY_ROOT(&blocks->blocks[i])) {
++ swapped = true;
++ break;
++ }
++ }
++ blocks->swapped = swapped;
++ spin_unlock(&blocks->lock);
++
++ /* Read out reloc subtree root */
++ reloc_eb = read_tree_block(fs_info->tree_root, block->reloc_bytenr,
++ block->reloc_generation, block->level,
++ &block->first_key);
++ if (IS_ERR(reloc_eb)) {
++ ret = PTR_ERR(reloc_eb);
++ reloc_eb = NULL;
++ goto free_out;
++ }
++ if (!extent_buffer_uptodate(reloc_eb)) {
++ ret = -EIO;
++ goto free_out;
++ }
++
++ ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
++ block->last_snapshot, block->trace_leaf);
++free_out:
++ kfree(block);
++ free_extent_buffer(reloc_eb);
++out:
++ if (ret < 0) {
++ btrfs_err_rl(fs_info,
++ "failed to account subtree at bytenr %llu: %d",
++ subvol_eb->start, ret);
++ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
++ }
++ return ret;
++}
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -390,5 +390,7 @@ int btrfs_qgroup_add_swapped_blocks(stru
+ struct extent_buffer *subvol_parent, int subvol_slot,
+ struct extent_buffer *reloc_parent, int reloc_slot,
+ u64 last_snapshot);
++int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root, struct extent_buffer *eb);
+
+ #endif /* __BTRFS_QGROUP__ */
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1925,16 +1925,12 @@ again:
+ * If not traced, we will leak data numbers
+ * 2) Fs subtree
+ * If not traced, we will double count old data
+- * and tree block numbers, if current trans doesn't free
+- * data reloc tree inode.
++ *
++ * We don't scan the subtree right now, but only record
++ * the swapped tree blocks.
++ * The real subtree rescan is delayed until we have new
++ * CoW on the subtree root node before transaction commit.
+ */
+- ret = btrfs_qgroup_trace_subtree_swap(trans, rc->block_group,
+- parent, slot, path->nodes[level],
+- path->slots[level], last_snapshot);
+- if (ret < 0)
+- break;
+-
+- btrfs_node_key_to_cpu(parent, &first_key, slot);
+ ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
+ rc->block_group, parent, slot,
+ path->nodes[level], path->slots[level],
diff --git a/patches.suse/0004-btrfs-qgroup-Use-generation-aware-subtree-swap-to-ma.patch b/patches.suse/0004-btrfs-qgroup-Use-generation-aware-subtree-swap-to-ma.patch
new file mode 100644
index 0000000000..c1c7574c19
--- /dev/null
+++ b/patches.suse/0004-btrfs-qgroup-Use-generation-aware-subtree-swap-to-ma.patch
@@ -0,0 +1,213 @@
+From 5f527822be40104e9056c981ff06c7750153a10a Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:32 +0800
+Patch-mainline: v4.19
+Git-commit: 5f527822be40104e9056c981ff06c7750153a10a
+References: bsc#1063638
+Subject: [PATCH 4/6] btrfs: qgroup: Use generation-aware subtree swap to mark
+ dirty extents
+
+Before this patch, with quota enabled during balance, we need to mark
+the whole subtree dirty for quota.
+
+E.g.
+OO = Old tree blocks (from file tree)
+NN = New tree blocks (from reloc tree)
+
+ File tree (src) Reloc tree (dst)
+ OO (a) NN (a)
+ / \ / \
+ (b) OO OO (c) (b) NN NN (c)
+ / \ / \ / \ / \
+ OO OO OO OO (d) OO OO OO NN (d)
+
+For old balance + quota case, quota will mark the whole src and dst tree
+dirty, including all the 3 old tree blocks in reloc tree.
+
+It's doable for small file tree or new tree blocks are all located at
+lower level.
+
+But for large file tree or new tree blocks are all located at higher
+level, this will lead to mark the whole tree dirty, and be unbelievably
+slow.
+
+This patch will change how we handle such balance with quota enabled
+case.
+
+Now we will search from (b) and (c) for any new tree blocks whose
+generation is equal to @last_snapshot, and only mark them dirty.
+
+In above case, we only need to trace tree blocks NN(b), NN(c) and NN(d).
+(NN(a) will be traced when COW happens for nodeptr modification). And
+also for tree blocks OO(b), OO(c), OO(d). (OO(a) will be traced when COW
+happens for nodeptr modification.)
+
+For above case, we could skip 3 tree blocks, but for larger tree, we can
+skip tons of unmodified tree blocks, and hugely speed up balance.
+
+This patch will introduce a new function,
+btrfs_qgroup_trace_subtree_swap(), which will do the following main
+work:
+
+1) Read out real root eb
+ And setup basic dst_path for later calls
+2) Call qgroup_trace_new_subtree_blocks()
+ To trace all new tree blocks in reloc tree and their counter
+ parts in the file tree.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/qgroup.h | 5 ++
+ fs/btrfs/relocation.c | 12 +----
+ 3 files changed, 112 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1927,6 +1927,110 @@ out:
+ return ret;
+ }
+
++/*
++ * Inform qgroup to trace subtree swap used in balance.
++ *
++ * Unlike btrfs_qgroup_trace_subtree(), this function will only trace
++ * new tree blocks whose generation is equal to (or larger than) @last_snapshot.
++ *
++ * Will go down the tree block pointed by @dst_eb (pointed by @dst_parent and
++ * @dst_slot), and find any tree blocks whose generation is at @last_snapshot,
++ * and then go down @src_eb (pointed by @src_parent and @src_slot) to find
++ * the conterpart of the tree block, then mark both tree blocks as qgroup dirty,
++ * and skip all tree blocks whose generation is smaller than last_snapshot.
++ *
++ * This would skip tons of tree blocks of original btrfs_qgroup_trace_subtree(),
++ * which could be the cause of very slow balance if the file tree is large.
++ *
++ * @src_parent, @src_slot: pointer to src (file tree) eb.
++ * @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
++ */
++int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
++ struct extent_buffer *src_parent, int src_slot,
++ struct extent_buffer *dst_parent, int dst_slot,
++ u64 last_snapshot)
++{
++ struct btrfs_fs_info *fs_info = trans->fs_info;
++ struct btrfs_path *dst_path = NULL;
++ struct btrfs_key first_key;
++ struct extent_buffer *src_eb = NULL;
++ struct extent_buffer *dst_eb = NULL;
++ u64 child_gen;
++ u64 child_bytenr;
++ int level;
++ int ret;
++
++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
++ return 0;
++
++ /* Check parameter order */
++ if (btrfs_node_ptr_generation(src_parent, src_slot) >
++ btrfs_node_ptr_generation(dst_parent, dst_slot)) {
++ btrfs_err_rl(fs_info,
++ "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
++ btrfs_node_ptr_generation(src_parent, src_slot),
++ btrfs_node_ptr_generation(dst_parent, dst_slot));
++ return -EUCLEAN;
++ }
++
++ /* Read out real @src_eb, pointed by @src_parent and @src_slot */
++ child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
++ child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
++ btrfs_node_key_to_cpu(src_parent, &first_key, src_slot);
++
++ src_eb = read_tree_block(fs_info->tree_root, child_bytenr, child_gen,
++ btrfs_header_level(src_parent) - 1, &first_key);
++ if (IS_ERR(src_eb)) {
++ ret = PTR_ERR(src_eb);
++ goto out;
++ }
++
++ /* Read out real @dst_eb, pointed by @src_parent and @src_slot */
++ child_bytenr = btrfs_node_blockptr(dst_parent, dst_slot);
++ child_gen = btrfs_node_ptr_generation(dst_parent, dst_slot);
++ btrfs_node_key_to_cpu(dst_parent, &first_key, dst_slot);
++
++ dst_eb = read_tree_block(fs_info->tree_root, child_bytenr, child_gen,
++ btrfs_header_level(dst_parent) - 1, &first_key);
++ if (IS_ERR(dst_eb)) {
++ ret = PTR_ERR(dst_eb);
++ goto out;
++ }
++
++ if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ level = btrfs_header_level(dst_eb);
++ dst_path = btrfs_alloc_path();
++ if (!dst_path) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ /* For dst_path */
++ extent_buffer_get(dst_eb);
++ dst_path->nodes[level] = dst_eb;
++ dst_path->slots[level] = 0;
++ dst_path->locks[level] = 0;
++
++ /* Do the generation-aware breadth-first search */
++ ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
++ level, last_snapshot);
++ if (ret < 0)
++ goto out;
++ ret = 0;
++
++out:
++ free_extent_buffer(src_eb);
++ free_extent_buffer(dst_eb);
++ btrfs_free_path(dst_path);
++ if (ret < 0)
++ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
++ return ret;
++}
++
+ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -202,6 +202,11 @@ int btrfs_qgroup_trace_subtree(struct bt
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+ u64 root_gen, int root_level);
++
++int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
++ struct extent_buffer *src_parent, int src_slot,
++ struct extent_buffer *dst_parent, int dst_slot,
++ u64 last_snapshot);
+ int
+ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1924,15 +1924,9 @@ again:
+ * and tree block numbers, if current trans doesn't free
+ * data reloc tree inode.
+ */
+- ret = btrfs_qgroup_trace_subtree(trans, src, parent,
+- btrfs_header_generation(parent),
+- btrfs_header_level(parent));
+- if (ret < 0)
+- break;
+- ret = btrfs_qgroup_trace_subtree(trans, dest,
+- path->nodes[level],
+- btrfs_header_generation(path->nodes[level]),
+- btrfs_header_level(path->nodes[level]));
++ ret = btrfs_qgroup_trace_subtree_swap(trans, parent, slot,
++ path->nodes[level], path->slots[level],
++ last_snapshot);
+ if (ret < 0)
+ break;
+
diff --git a/patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch b/patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch
new file mode 100644
index 0000000000..3ba555c3b3
--- /dev/null
+++ b/patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch
@@ -0,0 +1,139 @@
+From 9627736b75f612e05cef122b215a68113af9cd4d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 23 Jan 2019 15:15:18 +0800
+Patch-mainline: v5.1
+Git-commit: 9627736b75f612e05cef122b215a68113af9cd4d
+References: bsc#1063638
+Subject: [PATCH 5/5] btrfs: qgroup: Cleanup old subtree swap code
+
+Since it's replaced by new delayed subtree swap code, remove the
+original code.
+
+The cleanup is small since most of its core function is still used by
+delayed subtree swap trace.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 94 ------------------------------------------------------
+ fs/btrfs/qgroup.h | 6 ---
+ 2 files changed, 100 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1981,100 +1981,6 @@ out:
+ return ret;
+ }
+
+-/*
+- * Inform qgroup to trace subtree swap used in balance.
+- *
+- * Unlike btrfs_qgroup_trace_subtree(), this function will only trace
+- * new tree blocks whose generation is equal to (or larger than) @last_snapshot.
+- *
+- * Will go down the tree block pointed by @dst_eb (pointed by @dst_parent and
+- * @dst_slot), and find any tree blocks whose generation is at @last_snapshot,
+- * and then go down @src_eb (pointed by @src_parent and @src_slot) to find
+- * the conterpart of the tree block, then mark both tree blocks as qgroup dirty,
+- * and skip all tree blocks whose generation is smaller than last_snapshot.
+- *
+- * This would skip tons of tree blocks of original btrfs_qgroup_trace_subtree(),
+- * which could be the cause of very slow balance if the file tree is large.
+- *
+- * @src_parent, @src_slot: pointer to src (file tree) eb.
+- * @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
+- */
+-int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+- struct btrfs_block_group_cache *bg_cache,
+- struct extent_buffer *src_parent, int src_slot,
+- struct extent_buffer *dst_parent, int dst_slot,
+- u64 last_snapshot)
+-{
+- struct btrfs_fs_info *fs_info = trans->fs_info;
+- struct btrfs_key first_key;
+- struct extent_buffer *src_eb = NULL;
+- struct extent_buffer *dst_eb = NULL;
+- bool trace_leaf = false;
+- u64 child_gen;
+- u64 child_bytenr;
+- int ret;
+-
+- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+- return 0;
+-
+- /* Check parameter order */
+- if (btrfs_node_ptr_generation(src_parent, src_slot) >
+- btrfs_node_ptr_generation(dst_parent, dst_slot)) {
+- btrfs_err_rl(fs_info,
+- "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
+- btrfs_node_ptr_generation(src_parent, src_slot),
+- btrfs_node_ptr_generation(dst_parent, dst_slot));
+- return -EUCLEAN;
+- }
+-
+- /*
+- * Only trace leaf if we're relocating data block groups, this could
+- * reduce tons of data extents tracing for meta/sys bg relocation.
+- */
+- if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
+- trace_leaf = true;
+- /* Read out real @src_eb, pointed by @src_parent and @src_slot */
+- child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
+- child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
+- btrfs_node_key_to_cpu(src_parent, &first_key, src_slot);
+-
+- src_eb = read_tree_block(fs_info->tree_root, child_bytenr, child_gen,
+- btrfs_header_level(src_parent) - 1, &first_key);
+- if (IS_ERR(src_eb)) {
+- ret = PTR_ERR(src_eb);
+- goto out;
+- }
+-
+- /* Read out real @dst_eb, pointed by @src_parent and @src_slot */
+- child_bytenr = btrfs_node_blockptr(dst_parent, dst_slot);
+- child_gen = btrfs_node_ptr_generation(dst_parent, dst_slot);
+- btrfs_node_key_to_cpu(dst_parent, &first_key, dst_slot);
+-
+- dst_eb = read_tree_block(fs_info->tree_root, child_bytenr, child_gen,
+- btrfs_header_level(dst_parent) - 1, &first_key);
+- if (IS_ERR(dst_eb)) {
+- ret = PTR_ERR(dst_eb);
+- goto out;
+- }
+-
+- if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
+- ret = -EINVAL;
+- goto out;
+- }
+-
+- /* Do the generation aware breadth-first search */
+- ret = qgroup_trace_subtree_swap(trans, src_eb, dst_eb, last_snapshot,
+- trace_leaf);
+- if (ret < 0)
+- goto out;
+- ret = 0;
+-
+-out:
+- free_extent_buffer(src_eb);
+- free_extent_buffer(dst_eb);
+- return ret;
+-}
+-
+ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -282,12 +282,6 @@ int btrfs_qgroup_trace_subtree(struct bt
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+ u64 root_gen, int root_level);
+-
+-int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+- struct btrfs_block_group_cache *bg_cache,
+- struct extent_buffer *src_parent, int src_slot,
+- struct extent_buffer *dst_parent, int dst_slot,
+- u64 last_snapshot);
+ int
+ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
diff --git a/patches.suse/0005-btrfs-qgroup-Don-t-trace-subtree-if-we-re-dropping-r.patch b/patches.suse/0005-btrfs-qgroup-Don-t-trace-subtree-if-we-re-dropping-r.patch
new file mode 100644
index 0000000000..f51c6a24ac
--- /dev/null
+++ b/patches.suse/0005-btrfs-qgroup-Don-t-trace-subtree-if-we-re-dropping-r.patch
@@ -0,0 +1,67 @@
+From 2cd86d309bd1203241ceb833effe90787f3564a1 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:33 +0800
+Patch-mainline: v4.19
+Git-commit: 2cd86d309bd1203241ceb833effe90787f3564a1
+References: bsc#1063638
+Subject: [PATCH 5/6] btrfs: qgroup: Don't trace subtree if we're dropping
+ reloc tree
+
+Reloc tree doesn't contribute to qgroup numbers, as we have accounted
+them at balance time (see replace_path()).
+
+Skipping the unneeded subtree tracing should reduce the overhead.
+
+[[Benchmark]]
+Hardware:
+ VM 4G vRAM, 8 vCPUs,
+ disk is using 'unsafe' cache mode,
+ backing device is SAMSUNG 850 evo SSD.
+ Host has 16G ram.
+
+Mkfs parameter:
+ --nodesize 4K (To bump up tree size)
+
+Initial subvolume contents:
+ 4G data copied from /usr and /lib.
+ (With enough regular small files)
+
+Snapshots:
+ 16 snapshots of the original subvolume.
+ each snapshot has 3 random files modified.
+
+balance parameter:
+ -m
+
+So the content should be pretty similar to a real world root fs layout.
+
+ | v4.19-rc1 | w/ patchset | diff (*)
+---------------------------------------------------------------
+relocated extents | 22929 | 22900 | -0.1%
+qgroup dirty extents | 227757 | 167139 | -26.6%
+time (sys) | 65.253s | 50.123s | -23.2%
+time (real) | 74.032s | 52.551s | -29.0%
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/extent-tree.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -8813,7 +8813,13 @@ skip:
+ parent = 0;
+ }
+
+- if (need_account) {
++ /*
++ * Reloc tree doesn't contribute to qgroup numbers, and we have
++ * already accounted them at merge time (replace_path),
++ * thus we could skip expensive subtree trace here.
++ */
++ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
++ need_account) {
+ ret = btrfs_qgroup_trace_subtree(trans, root, next,
+ generation, level - 1);
+ if (ret) {
diff --git a/patches.suse/0006-btrfs-qgroup-Only-trace-data-extents-in-leaves-if-we.patch b/patches.suse/0006-btrfs-qgroup-Only-trace-data-extents-in-leaves-if-we.patch
new file mode 100644
index 0000000000..697f2e8acb
--- /dev/null
+++ b/patches.suse/0006-btrfs-qgroup-Only-trace-data-extents-in-leaves-if-we.patch
@@ -0,0 +1,188 @@
+From 3d0174f78e72301324a5b0ba7d67676474e36fff Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 27 Sep 2018 14:42:35 +0800
+Patch-mainline: v4.19
+Git-commit: 3d0174f78e72301324a5b0ba7d67676474e36fff
+References: bsc#1063638
+Subject: [PATCH 6/6] btrfs: qgroup: Only trace data extents in leaves if we're
+ relocating data block group
+
+For qgroup_trace_extent_swap(), if we find one leaf that needs to be
+traced, we will also iterate all file extents and trace them.
+
+This is OK if we're relocating data block groups, but if we're
+relocating metadata block groups, balance code itself has ensured that
+both subtree of file tree and reloc tree contain the same contents.
+
+That's to say, if we're relocating metadata block groups, all file
+extents in reloc and file tree should match, thus no need to trace them.
+This should reduce the total number of dirty extents processed in metadata
+block group balance.
+
+[[Benchmark]] (with all previous enhancement)
+Hardware:
+ VM 4G vRAM, 8 vCPUs,
+ disk is using 'unsafe' cache mode,
+ backing device is SAMSUNG 850 evo SSD.
+ Host has 16G ram.
+
+Mkfs parameter:
+ --nodesize 4K (To bump up tree size)
+
+Initial subvolume contents:
+ 4G data copied from /usr and /lib.
+ (With enough regular small files)
+
+Snapshots:
+ 16 snapshots of the original subvolume.
+ each snapshot has 3 random files modified.
+
+balance parameter:
+ -m
+
+So the content should be pretty similar to a real world root fs layout.
+
+ | v4.19-rc1 | w/ patchset | diff (*)
+---------------------------------------------------------------
+relocated extents | 22929 | 22851 | -0.3%
+qgroup dirty extents | 227757 | 140886 | -38.1%
+time (sys) | 65.253s | 37.464s | -42.6%
+time (real) | 74.032s | 44.722s | -39.6%
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+---
+ fs/btrfs/qgroup.c | 21 +++++++++++++++------
+ fs/btrfs/qgroup.h | 1 +
+ fs/btrfs/relocation.c | 10 +++++-----
+ 3 files changed, 21 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1678,7 +1678,8 @@ static int adjust_slots_upwards(struct b
+ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
+ struct extent_buffer *src_eb,
+ struct btrfs_path *dst_path,
+- int dst_level, int root_level)
++ int dst_level, int root_level,
++ bool trace_leaf)
+ {
+ struct btrfs_key key;
+ struct btrfs_path *src_path;
+@@ -1779,7 +1780,7 @@ static int qgroup_trace_extent_swap(stru
+ goto out;
+
+ /* Record leaf file extents */
+- if (dst_level == 0) {
++ if (dst_level == 0 && trace_leaf) {
+ ret = btrfs_qgroup_trace_leaf_items(trans, fs_info->tree_root,
+ src_path->nodes[0]);
+ if (ret < 0)
+@@ -1818,7 +1819,7 @@ static int qgroup_trace_new_subtree_bloc
+ struct extent_buffer *src_eb,
+ struct btrfs_path *dst_path,
+ int cur_level, int root_level,
+- u64 last_snapshot)
++ u64 last_snapshot, bool trace_leaf)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct extent_buffer *eb;
+@@ -1890,7 +1891,7 @@ static int qgroup_trace_new_subtree_bloc
+
+ /* Now record this tree block and its counter part for qgroups */
+ ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
+- root_level);
++ root_level, trace_leaf);
+ if (ret < 0)
+ goto cleanup;
+
+@@ -1907,7 +1908,7 @@ static int qgroup_trace_new_subtree_bloc
+ /* Recursive call (at most 7 times) */
+ ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
+ dst_path, cur_level - 1, root_level,
+- last_snapshot);
++ last_snapshot, trace_leaf);
+ if (ret < 0)
+ goto cleanup;
+ }
+@@ -1946,6 +1947,7 @@ out:
+ * @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
+ */
+ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
++ struct btrfs_block_group_cache *bg_cache,
+ struct extent_buffer *src_parent, int src_slot,
+ struct extent_buffer *dst_parent, int dst_slot,
+ u64 last_snapshot)
+@@ -1955,6 +1957,7 @@ int btrfs_qgroup_trace_subtree_swap(stru
+ struct btrfs_key first_key;
+ struct extent_buffer *src_eb = NULL;
+ struct extent_buffer *dst_eb = NULL;
++ bool trace_leaf = false;
+ u64 child_gen;
+ u64 child_bytenr;
+ int level;
+@@ -1973,6 +1976,12 @@ int btrfs_qgroup_trace_subtree_swap(stru
+ return -EUCLEAN;
+ }
+
++ /*
++ * Only trace leaf if we're relocating data block groups, this could
++ * reduce tons of data extents tracing for meta/sys bg relocation.
++ */
++ if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
++ trace_leaf = true;
+ /* Read out real @src_eb, pointed by @src_parent and @src_slot */
+ child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
+ child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
+@@ -2017,7 +2026,7 @@ int btrfs_qgroup_trace_subtree_swap(stru
+
+ /* Do the generation-aware breadth-first search */
+ ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
+- level, last_snapshot);
++ level, last_snapshot, trace_leaf);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+--- a/fs/btrfs/qgroup.h
++++ b/fs/btrfs/qgroup.h
+@@ -204,6 +204,7 @@ int btrfs_qgroup_trace_subtree(struct bt
+ u64 root_gen, int root_level);
+
+ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
++ struct btrfs_block_group_cache *bg_cache,
+ struct extent_buffer *src_parent, int src_slot,
+ struct extent_buffer *dst_parent, int dst_slot,
+ u64 last_snapshot);
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1781,7 +1781,7 @@ int memcmp_node_keys(struct extent_buffe
+ * errors, a negative error number is returned.
+ */
+ static noinline_for_stack
+-int replace_path(struct btrfs_trans_handle *trans,
++int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
+ struct btrfs_root *dest, struct btrfs_root *src,
+ struct btrfs_path *path, struct btrfs_key *next_key,
+ int lowest_level, int max_level)
+@@ -1924,9 +1924,9 @@ again:
+ * and tree block numbers, if current trans doesn't free
+ * data reloc tree inode.
+ */
+- ret = btrfs_qgroup_trace_subtree_swap(trans, parent, slot,
+- path->nodes[level], path->slots[level],
+- last_snapshot);
++ ret = btrfs_qgroup_trace_subtree_swap(trans, rc->block_group,
++ parent, slot, path->nodes[level],
++ path->slots[level], last_snapshot);
+ if (ret < 0)
+ break;
+
+@@ -2241,7 +2241,7 @@ static noinline_for_stack int merge_relo
+ btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
+ ret = 0;
+ } else {
+- ret = replace_path(trans, root, reloc_root, path,
++ ret = replace_path(trans, rc, root, reloc_root, path,
+ &next_key, level, max_level);
+ }
+ if (ret < 0) {
diff --git a/patches.suse/btrfs-btrfs-use-the-new-VFS-super_block_dev.patch b/patches.suse/btrfs-btrfs-use-the-new-VFS-super_block_dev.patch
index 26962be77e..c5e245f102 100644
--- a/patches.suse/btrfs-btrfs-use-the-new-VFS-super_block_dev.patch
+++ b/patches.suse/btrfs-btrfs-use-the-new-VFS-super_block_dev.patch
@@ -20,7 +20,7 @@ fdmanana: fix for running qgroup sanity tests
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
-@@ -1190,11 +1190,8 @@ struct btrfs_root {
+@@ -1229,11 +1229,8 @@ struct btrfs_root {
* protected by inode_lock
*/
struct radix_tree_root delayed_nodes_tree;
@@ -36,15 +36,15 @@ fdmanana: fix for running qgroup sanity tests
atomic_t refs;
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
-@@ -1297,7 +1297,6 @@ static void __setup_root(u32 nodesize, u
+@@ -1289,7 +1289,6 @@ static void __setup_root(u32 nodesize, u
else
root->defrag_trans_start = 0;
root->root_key.objectid = objectid;
- root->anon_dev = 0;
spin_lock_init(&root->root_item_lock);
- }
-@@ -1594,7 +1593,7 @@ int btrfs_init_fs_root(struct btrfs_root
+ btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
+@@ -1589,7 +1588,7 @@ int btrfs_init_fs_root(struct btrfs_root
spin_lock_init(&root->ino_cache_lock);
init_waitqueue_head(&root->ino_cache_wait);
@@ -53,7 +53,7 @@ fdmanana: fix for running qgroup sanity tests
if (ret)
goto fail;
-@@ -3711,8 +3710,8 @@ static void free_fs_root(struct btrfs_ro
+@@ -3724,8 +3723,8 @@ static void free_fs_root(struct btrfs_ro
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
btrfs_free_block_rsv(root, root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
@@ -66,7 +66,7 @@ fdmanana: fix for running qgroup sanity tests
free_extent_buffer(root->node);
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
-@@ -9411,7 +9411,7 @@ static int btrfs_getattr(struct vfsmount
+@@ -9482,7 +9482,7 @@ static int btrfs_getattr(struct vfsmount
u32 blocksize = inode->i_sb->s_blocksize;
generic_fillattr(inode, stat);
@@ -77,7 +77,7 @@ fdmanana: fix for running qgroup sanity tests
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
-@@ -2223,7 +2223,7 @@ static int btrfs_show_devname(struct seq
+@@ -2276,7 +2276,7 @@ static int btrfs_show_devname(struct seq
static dev_t btrfs_get_inode_dev(const struct inode *inode)
{
diff --git a/patches.suse/btrfs-quota-Set-rescan-progress-to-u64-1-if-we-hit-l.patch b/patches.suse/btrfs-quota-Set-rescan-progress-to-u64-1-if-we-hit-l.patch
index 2f8b11bf7c..cdbc5c4dd2 100644
--- a/patches.suse/btrfs-quota-Set-rescan-progress-to-u64-1-if-we-hit-l.patch
+++ b/patches.suse/btrfs-quota-Set-rescan-progress-to-u64-1-if-we-hit-l.patch
@@ -30,9 +30,9 @@ Signed-off-by: Jiri Slaby <jslaby@suse.cz>
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
-@@ -2608,8 +2608,10 @@ out:
+@@ -2684,8 +2684,10 @@ out:
+ free_extent_buffer(scratch_leaf);
}
- btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- if (done && !ret)
+ if (done && !ret) {
diff --git a/series.conf b/series.conf
index f60298f5fb..fc2cb9ce42 100644
--- a/series.conf
+++ b/series.conf
@@ -24238,6 +24238,7 @@
patches.drivers/nvme-fabrics-allow-internal-passthrough-command-on-d.patch
patches.fixes/restore-cond_resched-in-shrink_dcache_parent.patch
patches.fixes/rmdir-rename-do-shrink_dcache_parent-only-on-success.patch
+ patches.fixes/0001-btrfs-qgroup-Search-commit-root-for-rescan-to-avoid-.patch
patches.fixes/0001-dlm-fix-a-clerical-error-when-set-SCTP_NODELAY.patch
patches.fixes/0002-dlm-make-sctp_connect_to_sock-return-in-specified-ti.patch
patches.fixes/0003-dlm-remove-O_NONBLOCK-flag-in-sctp_connect_to_sock.patch
@@ -24716,6 +24717,12 @@
patches.fixes/0001-btrfs-defrag-use-btrfs_mod_outstanding_extents-in-cl.patch
patches.suse/btrfs-fix-error-handling-in-btrfs_dev_replace_start.patch
patches.fixes/0001-btrfs-Enhance-btrfs_trim_fs-function-to-handle-error.patch
+ patches.suse/0001-btrfs-qgroup-Introduce-trace-event-to-analyse-the-nu.patch
+ patches.suse/0002-btrfs-qgroup-Introduce-function-to-trace-two-swaped-.patch
+ patches.suse/0003-btrfs-qgroup-Introduce-function-to-find-all-new-tree.patch
+ patches.suse/0004-btrfs-qgroup-Use-generation-aware-subtree-swap-to-ma.patch
+ patches.suse/0005-btrfs-qgroup-Don-t-trace-subtree-if-we-re-dropping-r.patch
+ patches.suse/0006-btrfs-qgroup-Only-trace-data-extents-in-leaves-if-we.patch
patches.fixes/edac-raise-the-maximum-number-of-memory-controllers.patch
patches.fixes/edac-thunderx-fix-memory-leak-in-thunderx_l2c_threaded_isr.patch
patches.drivers/0001-pci-aspm-fix-link_state-teardown-on-device-removal
@@ -24804,6 +24811,7 @@
patches.arch/acpi-nfit-x86-mce-validate-a-mce-s-address-before-using-it.patch
patches.drivers/ibmvnic-fix-accelerated-VLAN-handling.patch
patches.fixes/NFSv4-Don-t-exit-the-state-manager-without-clearing-.patch
+ patches.fixes/fuse-fix-possibly-missed-wake-up-after-abort.patch
patches.fixes/0001-floppy-fix-race-condition-in-__floppy_read_block_0.patch
patches.fixes/acpi-nfit-fix-ars-overflow-continuation.patch
patches.drivers/xhci-add-quirk-to-workaround-the-errata-seen-on-cavium-thunder-x2-soc.patch
@@ -24833,6 +24841,7 @@
patches.drivers/net-ibmvnic-Fix-RTNL-deadlock-during-device-reset.patch
patches.fixes/ipv4-ipv6-netfilter-Adjust-the-frag-mem-limit-when-t.patch
patches.drivers/net-mlx4_core-Correctly-set-PFC-param-if-global-paus.patch
+ patches.fixes/fuse-continue-to-send-FUSE_RELEASEDIR-when-FUSE_OPEN-returns-ENOSYS.patch
patches.drivers/IB-hfi1-Fix-an-out-of-bounds-access-in-get_hw_stats.patch
patches.arch/ibmvnic-Convert-reset-work-item-mutex-to-spin-lock.patch
patches.arch/ibmvnic-Fix-non-atomic-memory-allocation-in-IRQ-cont.patch
@@ -24911,6 +24920,11 @@
patches.fixes/0001-device-property-Fix-the-length-used-in-PROPERTY_ENTR.patch
patches.suse/intel_th-gth-Fix-an-off-by-one-in-output-unassigning.patch
patches.fixes/0001-cdc-wdm-pass-return-value-of-recover_from_urb_loss.patch
+ patches.suse/0001-btrfs-relocation-Delay-reloc-tree-deletion-after-mer.patch
+ patches.suse/0002-btrfs-qgroup-Refactor-btrfs_qgroup_trace_subtree_swa.patch
+ patches.suse/0003-btrfs-qgroup-Introduce-per-root-swapped-blocks-infra.patch
+ patches.suse/0004-btrfs-qgroup-Use-delayed-subtree-rescan-for-balance.patch
+ patches.suse/0005-btrfs-qgroup-Cleanup-old-subtree-swap-code.patch
patches.arch/powerpc-pseries-Perform-full-re-add-of-CPU-for-topol.patch
patches.arch/powerpc-mm-hash-Handle-mmap_min_addr-correctly-in-ge.patch
patches.fixes/0001-drm-Fix-error-handling-in-drm_legacy_addctx.patch
@@ -24950,8 +24964,10 @@
patches.arch/powerpc-vdso64-Fix-CLOCK_MONOTONIC-inconsistencies-a.patch
patches.arch/powerpc-security-Fix-spectre_v2-reporting.patch
patches.drivers/ibmvscsi-Fix-empty-event-pool-access-during-host-rem.patch
+ patches.fixes/0001-btrfs-Fix-bound-checking-in-qgroup_trace_new_subtree.patch
patches.suse/btrfs-avoid-possible-qgroup_rsv_size-overflow-in-btrfs_calculate_inode_block_rsv_size.patch
patches.fixes/NFS-fix-mount-umount-race-in-nlmclnt.patch
+ patches.fixes/0001-net-sysfs-call-dev_hold-if-kobject_init_and_add-succ.patch
patches.drivers/iommu-don-t-print-warning-when-iommu-driver-only-supports-unmanaged-domains
patches.drivers/iommu-amd-reserve-exclusion-range-in-iova-domain
patches.fixes/0001-mm-debug.c-fix-__dump_page-when-mapping-host-is-not-.patch
@@ -24968,6 +24984,7 @@
patches.drivers/iommu-amd-set-exclusion-range-correctly
patches.arch/powerpc-vdso32-fix-CLOCK_MONOTONIC-on-PPC64.patch
patches.arch/svm-avic-fix-invalidate-logical-apic-id-entry
+ patches.arch/perf-x86-amd-add-event-map-for-amd-family-17h.patch
# davem/net
patches.drivers/ibmvnic-Enable-GRO.patch
@@ -24976,6 +24993,12 @@
# davem/net-next
patches.drivers/ibmvnic-Report-actual-backing-device-speed-and-duple.patch
+ # tip/tip
+ patches.arch/cpu-speculation-add-mitigations-cmdline-option.patch
+ patches.arch/x86-speculation-support-mitigations-cmdline-option.patch
+ patches.arch/powerpc-speculation-support-mitigations-cmdline-option.patch
+ patches.arch/s390-speculation-support-mitigations-cmdline-option.patch
+
########################################################
# end of sorted patches
########################################################