Home Home > GIT Browse > SLE12-SP5-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2019-08-20 16:24:41 +0200
committerMichal Suchanek <msuchanek@suse.de>2019-08-20 16:24:41 +0200
commit6cd670e51920d45eebd34064fc2701e74a0be6b6 (patch)
treeabfab1b1f31889c431e9ce4f00b940c5b7a0d745
parent8c3e0060daae157e3f8d3b1766394d34f4c8bf4e (diff)
parentdbab7395d5a7992c5c4f49eca5707b18f437d43b (diff)
Merge remote-tracking branch 'origin/users/msuchanek/SLE15/fadump' into SLE15
-rw-r--r--patches.arch/powerpc-fadump-Do-not-allow-hot-remove-memory-from-f.patch91
-rw-r--r--patches.arch/powerpc-fadump-Reservationless-firmware-assisted-dum.patch300
-rw-r--r--patches.arch/powerpc-fadump-Throw-proper-error-message-on-fadump-.patch79
-rw-r--r--series.conf3
4 files changed, 473 insertions, 0 deletions
diff --git a/patches.arch/powerpc-fadump-Do-not-allow-hot-remove-memory-from-f.patch b/patches.arch/powerpc-fadump-Do-not-allow-hot-remove-memory-from-f.patch
new file mode 100644
index 0000000000..1fd2475023
--- /dev/null
+++ b/patches.arch/powerpc-fadump-Do-not-allow-hot-remove-memory-from-f.patch
@@ -0,0 +1,91 @@
+From 0db6896ff6332ba694f1e61b93ae3b2640317633 Mon Sep 17 00:00:00 2001
+From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Date: Mon, 20 Aug 2018 13:47:32 +0530
+Subject: [PATCH] powerpc/fadump: Do not allow hot-remove memory from fadump
+ reserved area.
+
+References: bsc#1120937, FATE#321840, FATE#325306
+Patch-mainline: v5.0-rc1
+Git-commit: 0db6896ff6332ba694f1e61b93ae3b2640317633
+
+For fadump to work successfully there should not be any holes in reserved
+memory ranges where kernel has asked firmware to move the content of old
+kernel memory in event of crash. Now that fadump uses CMA for reserved
+area, this memory area is now not protected from hot-remove operations
+unless it is cma allocated. Hence, fadump service can fail to re-register
+after the hot-remove operation, if hot-removed memory belongs to fadump
+reserved region. To avoid this make sure that memory from fadump reserved
+area is not hot-removable if fadump is registered.
+
+However, if user still wants to remove that memory, he can do so by
+manually stopping fadump service before hot-remove operation.
+
+Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/fadump.h | 2 +-
+ arch/powerpc/kernel/fadump.c | 10 ++++++++--
+ arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +++++--
+ 3 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
+index 0a4e37159276..188776befaf9 100644
+--- a/arch/powerpc/include/asm/fadump.h
++++ b/arch/powerpc/include/asm/fadump.h
+@@ -205,7 +205,7 @@ struct fad_crash_memory_ranges {
+ unsigned long long size;
+ };
+
+-extern int is_fadump_boot_memory_area(u64 addr, ulong size);
++extern int is_fadump_memory_area(u64 addr, ulong size);
+ extern int early_init_dt_scan_fw_dump(unsigned long node,
+ const char *uname, int depth, void *data);
+ extern int fadump_reserve_mem(void);
+diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
+index b7f83d6eb697..45a8d0be1c96 100644
+--- a/arch/powerpc/kernel/fadump.c
++++ b/arch/powerpc/kernel/fadump.c
+@@ -183,13 +183,19 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
+
+ /*
+ * If fadump is registered, check if the memory provided
+- * falls within boot memory area.
++ * falls within boot memory area and reserved memory area.
+ */
+-int is_fadump_boot_memory_area(u64 addr, ulong size)
++int is_fadump_memory_area(u64 addr, ulong size)
+ {
++ u64 d_start = fw_dump.reserve_dump_area_start;
++ u64 d_end = d_start + fw_dump.reserve_dump_area_size;
++
+ if (!fw_dump.dump_registered)
+ return 0;
+
++ if (((addr + size) > d_start) && (addr <= d_end))
++ return 1;
++
+ return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ }
+
+diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
+index 3b881ac66d9a..be8a6db3558e 100644
+--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
+@@ -353,8 +353,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
+ phys_addr = lmb->base_addr;
+
+ #ifdef CONFIG_FA_DUMP
+- /* Don't hot-remove memory that falls in fadump boot memory area */
+- if (is_fadump_boot_memory_area(phys_addr, block_sz))
++ /*
++ * Don't hot-remove memory that falls in fadump boot memory area
++ * and memory that is reserved for capturing old kernel memory.
++ */
++ if (is_fadump_memory_area(phys_addr, block_sz))
+ return false;
+ #endif
+
+--
+2.19.2
+
diff --git a/patches.arch/powerpc-fadump-Reservationless-firmware-assisted-dum.patch b/patches.arch/powerpc-fadump-Reservationless-firmware-assisted-dum.patch
new file mode 100644
index 0000000000..c2fb00461c
--- /dev/null
+++ b/patches.arch/powerpc-fadump-Reservationless-firmware-assisted-dum.patch
@@ -0,0 +1,300 @@
+From a4e92ce8e4c8275bacfe3529d6ac85d54a233d87 Mon Sep 17 00:00:00 2001
+From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Date: Mon, 20 Aug 2018 13:47:17 +0530
+Subject: [PATCH] powerpc/fadump: Reservationless firmware assisted dump
+
+References: bsc#1120937, FATE#321840, FATE#325306
+Patch-mainline: v5.0-rc1
+Git-commit: a4e92ce8e4c8275bacfe3529d6ac85d54a233d87
+
+One of the primary issues with Firmware Assisted Dump (fadump) on Power
+is that it needs a large amount of memory to be reserved. On large
+systems with TeraBytes of memory, this reservation can be quite
+significant.
+
+In some cases, fadump fails if the memory reserved is insufficient, or
+if the reserved memory was DLPAR hot-removed.
+
+In the normal case, post reboot, the preserved memory is filtered to
+extract only relevant areas of interest using the makedumpfile tool.
+While the tool provides flexibility to determine what needs to be part
+of the dump and what memory to filter out, all supported distributions
+default this to "Capture only kernel data and nothing else".
+
+We take advantage of this default and the Linux kernel's Contiguous
+Memory Allocator (CMA) to fundamentally change the memory reservation
+model for fadump.
+
+Instead of setting aside a significant chunk of memory nobody can use,
+this patch uses CMA instead, to reserve a significant chunk of memory
+that the kernel is prevented from using (due to MIGRATE_CMA), but
+applications are free to use it. With this fadump will still be able
+to capture all of the kernel memory and most of the user space memory
+except the user pages that were present in CMA region.
+
+Essentially, on a P9 LPAR with 2 cores, 8GB RAM and current upstream:
+[root@zzxx-yy10 ~]# free -m
+ total used free shared buff/cache available
+Mem: 7557 193 6822 12 541 6725
+Swap: 4095 0 4095
+
+With this patch:
+[root@zzxx-yy10 ~]# free -m
+ total used free shared buff/cache available
+Mem: 8133 194 7464 12 475 7338
+Swap: 4095 0 4095
+
+Changes made here are completely transparent to how fadump has
+traditionally worked.
+
+Thanks to Aneesh Kumar and Anshuman Khandual for helping us understand
+CMA and its usage.
+
+TODO:
+- Handle case where CMA reservation spans nodes.
+
+Signed-off-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ .../powerpc/firmware-assisted-dump.txt | 17 +++-
+ arch/powerpc/include/asm/fadump.h | 5 +
+ arch/powerpc/kernel/fadump.c | 97 +++++++++++++++++--
+ 3 files changed, 108 insertions(+), 11 deletions(-)
+
+diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
+index bdd344aa18d9..18c5feef2577 100644
+--- a/Documentation/powerpc/firmware-assisted-dump.txt
++++ b/Documentation/powerpc/firmware-assisted-dump.txt
+@@ -113,7 +113,15 @@ header, is usually reserved at an offset greater than boot memory
+ size (see Fig. 1). This area is *not* released: this region will
+ be kept permanently reserved, so that it can act as a receptacle
+ for a copy of the boot memory content in addition to CPU state
+-and HPTE region, in the case a crash does occur.
++and HPTE region, in the case a crash does occur. Since this reserved
++memory area is used only after the system crash, there is no point in
++blocking this significant chunk of memory from production kernel.
++Hence, the implementation uses the Linux kernel's Contiguous Memory
++Allocator (CMA) for memory reservation if CMA is configured for kernel.
++With CMA reservation this memory will be available for applications to
++use it, while kernel is prevented from using it. With this fadump will
++still be able to capture all of the kernel memory and most of the user
++space memory except the user pages that were present in CMA region.
+
+ o Memory Reservation during first kernel
+
+@@ -162,6 +170,9 @@ How to enable firmware-assisted dump (fadump):
+
+ 1. Set config option CONFIG_FA_DUMP=y and build kernel.
+ 2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
++ By default, fadump reserved memory will be initialized as CMA area.
++ Alternatively, user can boot linux kernel with 'fadump=nocma' to
++ prevent fadump to use CMA.
+ 3. Optionally, user can also set 'crashkernel=' kernel cmdline
+ to specify size of the memory to reserve for boot memory dump
+ preservation.
+@@ -172,6 +183,10 @@ NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ 2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
++ 3. if user wants to capture all of user space memory and ok with
++ reserved memory not available to production system, then
++ 'fadump=nocma' kernel parameter can be used to fallback to
++ old behaviour.
+
+ Sysfs/debugfs files:
+ ------------
+diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
+index 1e7a33592e29..0a4e37159276 100644
+--- a/arch/powerpc/include/asm/fadump.h
++++ b/arch/powerpc/include/asm/fadump.h
+@@ -48,6 +48,10 @@
+
+ #define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+
++/* Alignement per CMA requirement. */
++#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
++ max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
++
+ /* Firmware provided dump sections */
+ #define FADUMP_CPU_STATE_DATA 0x0001
+ #define FADUMP_HPTE_REGION 0x0002
+@@ -141,6 +145,7 @@ struct fw_dump {
+ unsigned long fadump_supported:1;
+ unsigned long dump_active:1;
+ unsigned long dump_registered:1;
++ unsigned long nocma:1;
+ };
+
+ /*
+diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
+index 7a0da83bf883..ec937c7deae8 100644
+--- a/arch/powerpc/kernel/fadump.c
++++ b/arch/powerpc/kernel/fadump.c
+@@ -35,6 +35,7 @@
+ #include <linux/kobject.h>
+ #include <linux/sysfs.h>
+ #include <linux/slab.h>
++#include <linux/cma.h>
+
+ #include <asm/debugfs.h>
+ #include <asm/page.h>
+@@ -46,6 +47,9 @@
+ static struct fw_dump fw_dump;
+ static struct fadump_mem_struct fdm;
+ static const struct fadump_mem_struct *fdm_active;
++#ifdef CONFIG_CMA
++static struct cma *fadump_cma;
++#endif
+
+ static DEFINE_MUTEX(fadump_mutex);
+ struct fad_crash_memory_ranges *crash_memory_ranges;
+@@ -53,6 +57,67 @@ int crash_memory_ranges_size;
+ int crash_mem_ranges;
+ int max_crash_mem_ranges;
+
++#ifdef CONFIG_CMA
++/*
++ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
++ *
++ * This function initializes CMA area from fadump reserved memory.
++ * The total size of fadump reserved memory covers for boot memory size
++ * + cpu data size + hpte size and metadata.
++ * Initialize only the area equivalent to boot memory size for CMA use.
++ * The reamining portion of fadump reserved memory will be not given
++ * to CMA and pages for thoes will stay reserved. boot memory size is
++ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
++ * But for some reason even if it fails we still have the memory reservation
++ * with us and we can still continue doing fadump.
++ */
++int __init fadump_cma_init(void)
++{
++ unsigned long long base, size;
++ int rc;
++
++ if (!fw_dump.fadump_enabled)
++ return 0;
++
++ /*
++ * Do not use CMA if user has provided fadump=nocma kernel parameter.
++ * Return 1 to continue with fadump old behaviour.
++ */
++ if (fw_dump.nocma)
++ return 1;
++
++ base = fw_dump.reserve_dump_area_start;
++ size = fw_dump.boot_memory_size;
++
++ if (!size)
++ return 0;
++
++ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
++ if (rc) {
++ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
++ /*
++ * Though the CMA init has failed we still have memory
++ * reservation with us. The reserved memory will be
++ * blocked from production system usage. Hence return 1,
++ * so that we can continue with fadump.
++ */
++ return 1;
++ }
++
++ /*
++ * So we now have successfully initialized cma area for fadump.
++ */
++ pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
++ "bytes of memory reserved for firmware-assisted dump\n",
++ cma_get_size(fadump_cma),
++ (unsigned long)cma_get_base(fadump_cma) >> 20,
++ fw_dump.reserve_dump_area_size);
++ return 1;
++}
++#else
++static int __init fadump_cma_init(void) { return 1; }
++#endif /* CONFIG_CMA */
++
+ /* Scan the Firmware Assisted dump configuration details. */
+ int __init early_init_dt_scan_fw_dump(unsigned long node,
+ const char *uname, int depth, void *data)
+@@ -378,8 +443,15 @@ int __init fadump_reserve_mem(void)
+ */
+ if (fdm_active)
+ fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
+- else
++ else {
+ fw_dump.boot_memory_size = fadump_calculate_reserve_size();
++#ifdef CONFIG_CMA
++ if (!fw_dump.nocma)
++ fw_dump.boot_memory_size =
++ ALIGN(fw_dump.boot_memory_size,
++ FADUMP_CMA_ALIGNMENT);
++#endif
++ }
+
+ /*
+ * Calculate the memory boundary.
+@@ -426,8 +498,9 @@ int __init fadump_reserve_mem(void)
+ fw_dump.fadumphdr_addr =
+ be64_to_cpu(fdm_active->rmr_region.destination_address) +
+ be64_to_cpu(fdm_active->rmr_region.source_len);
+- pr_debug("fadumphdr_addr = %p\n",
+- (void *) fw_dump.fadumphdr_addr);
++ pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
++ fw_dump.reserve_dump_area_start = base;
++ fw_dump.reserve_dump_area_size = size;
+ } else {
+ size = get_fadump_area_size();
+
+@@ -455,10 +528,11 @@ int __init fadump_reserve_mem(void)
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
+- }
+
+- fw_dump.reserve_dump_area_start = base;
+- fw_dump.reserve_dump_area_size = size;
++ fw_dump.reserve_dump_area_start = base;
++ fw_dump.reserve_dump_area_size = size;
++ return fadump_cma_init();
++ }
+ return 1;
+ }
+
+@@ -477,6 +551,10 @@ static int __init early_fadump_param(char *p)
+ fw_dump.fadump_enabled = 1;
+ else if (strncmp(p, "off", 3) == 0)
+ fw_dump.fadump_enabled = 0;
++ else if (strncmp(p, "nocma", 5) == 0) {
++ fw_dump.fadump_enabled = 1;
++ fw_dump.nocma = 1;
++ }
+
+ return 0;
+ }
+@@ -1229,7 +1307,7 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+ return 0;
+ }
+
+-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
++static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
+ {
+ int rc = 0;
+ unsigned int wait_time;
+@@ -1260,9 +1338,8 @@ void fadump_cleanup(void)
+ {
+ /* Invalidate the registration only if dump is active. */
+ if (fw_dump.dump_active) {
+- init_fadump_mem_struct(&fdm,
+- be64_to_cpu(fdm_active->cpu_state_data.destination_address));
+- fadump_invalidate_dump(&fdm);
++ /* pass the same memory dump structure provided by platform */
++ fadump_invalidate_dump(fdm_active);
+ } else if (fw_dump.dump_registered) {
+ /* Un-register Firmware-assisted dump if it was registered. */
+ fadump_unregister_dump(&fdm);
+--
+2.19.2
+
diff --git a/patches.arch/powerpc-fadump-Throw-proper-error-message-on-fadump-.patch b/patches.arch/powerpc-fadump-Throw-proper-error-message-on-fadump-.patch
new file mode 100644
index 0000000000..3e205274d0
--- /dev/null
+++ b/patches.arch/powerpc-fadump-Throw-proper-error-message-on-fadump-.patch
@@ -0,0 +1,79 @@
+From f86593be1e7f5405b980bb4b11640250ac81d7cb Mon Sep 17 00:00:00 2001
+From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+Date: Mon, 20 Aug 2018 13:47:24 +0530
+Subject: [PATCH] powerpc/fadump: Throw proper error message on fadump
+ registration failure
+
+References: bsc#1120937, FATE#321840, FATE#325306
+Patch-mainline: v5.0-rc1
+Git-commit: f86593be1e7f5405b980bb4b11640250ac81d7cb
+
+fadump fails to register when there are holes in reserved memory area.
+This can happen if user has hot-removed a memory that falls in the
+fadump reserved memory area. Throw a meaningful error message to the
+user in such case.
+
+Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+[mpe: is_reserved_memory_area_contiguous() returns bool, unsplit string]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kernel/fadump.c | 35 +++++++++++++++++++++++++++++++++--
+ 1 file changed, 33 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
+index ec937c7deae8..b7f83d6eb697 100644
+--- a/arch/powerpc/kernel/fadump.c
++++ b/arch/powerpc/kernel/fadump.c
+@@ -237,6 +237,35 @@ static int is_boot_memory_area_contiguous(void)
+ return ret;
+ }
+
++/*
++ * Returns true, if there are no holes in reserved memory area,
++ * false otherwise.
++ */
++static bool is_reserved_memory_area_contiguous(void)
++{
++ struct memblock_region *reg;
++ unsigned long start, end;
++ unsigned long d_start = fw_dump.reserve_dump_area_start;
++ unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
++
++ for_each_memblock(memory, reg) {
++ start = max(d_start, (unsigned long)reg->base);
++ end = min(d_end, (unsigned long)(reg->base + reg->size));
++ if (d_start < end) {
++ /* Memory hole from d_start to start */
++ if (start > d_start)
++ break;
++
++ if (end == d_end)
++ return true;
++
++ d_start = end + 1;
++ }
++ }
++
++ return false;
++}
++
+ /* Print firmware assisted dump configurations for debugging purpose. */
+ static void fadump_show_config(void)
+ {
+@@ -603,8 +632,10 @@ static int register_fw_dump(struct fadump_mem_struct *fdm)
+ break;
+ case -3:
+ if (!is_boot_memory_area_contiguous())
+- pr_err("Can't have holes in boot memory area while "
+- "registering fadump\n");
++ pr_err("Can't have holes in boot memory area while registering fadump\n");
++ else if (!is_reserved_memory_area_contiguous())
++ pr_err("Can't have holes in reserved memory area while"
++ " registering fadump\n");
+
+ printk(KERN_ERR "Failed to register firmware-assisted kernel"
+ " dump. Parameter Error(%d).\n", rc);
+--
+2.19.2
+
diff --git a/series.conf b/series.conf
index 7da51cf3cb..eb633c197c 100644
--- a/series.conf
+++ b/series.conf
@@ -20845,6 +20845,9 @@
patches.arch/powerpc-perf-Remove-l2-bus-events-from-HW-cache-even.patch
patches.arch/powerpc-fsl-Fix-spectre_v2-mitigations-reporting.patch
patches.arch/powerpc-powernv-ioda-Allocate-indirect-TCE-levels-of.patch
+ patches.arch/powerpc-fadump-Reservationless-firmware-assisted-dum.patch
+ patches.arch/powerpc-fadump-Throw-proper-error-message-on-fadump-.patch
+ patches.arch/powerpc-fadump-Do-not-allow-hot-remove-memory-from-f.patch
patches.arch/powerpc-tm-Set-MSR-TS-just-prior-to-recheckpoint.patch
patches.arch/powerpc-tm-Save-MSR-to-PACA-before-RFID.patch
patches.arch/powerpc-tm-Print-scratch-value.patch