Home Home > GIT Browse > SLE12-SP5-AZURE
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2019-08-20 16:24:41 +0200
committerMichal Suchanek <msuchanek@suse.de>2019-08-20 16:24:41 +0200
commit8b66e201886b5a9f208079d2ebbe02d579b2063d (patch)
treefd2b29c159943219a1a70331ec2e2989209956a6
parent06ec988b98cc5c14417d3cddcf63765823cd938c (diff)
parent07b7cd725d9c051ae5ae0a86baa5034d124e898b (diff)
Merge remote-tracking branch 'origin/users/msuchanek/SLE15/fadump' into SLE15
-rw-r--r--Documentation/powerpc/firmware-assisted-dump.txt17
-rw-r--r--arch/powerpc/include/asm/fadump.h7
-rw-r--r--arch/powerpc/kernel/fadump.c142
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c7
4 files changed, 155 insertions, 18 deletions
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index bdd344aa18d9..18c5feef2577 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -113,7 +113,15 @@ header, is usually reserved at an offset greater than boot memory
size (see Fig. 1). This area is *not* released: this region will
be kept permanently reserved, so that it can act as a receptacle
for a copy of the boot memory content in addition to CPU state
-and HPTE region, in the case a crash does occur.
+and HPTE region, in the case a crash does occur. Since this reserved
+memory area is used only after the system crash, there is no point in
+blocking this significant chunk of memory from production kernel.
+Hence, the implementation uses the Linux kernel's Contiguous Memory
+Allocator (CMA) for memory reservation if CMA is configured for kernel.
+With CMA reservation this memory will be available for applications to
+use it, while kernel is prevented from using it. With this fadump will
+still be able to capture all of the kernel memory and most of the user
+space memory except the user pages that were present in CMA region.
o Memory Reservation during first kernel
@@ -162,6 +170,9 @@ How to enable firmware-assisted dump (fadump):
1. Set config option CONFIG_FA_DUMP=y and build kernel.
2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+ By default, fadump reserved memory will be initialized as CMA area.
+ Alternatively, user can boot linux kernel with 'fadump=nocma' to
+ prevent fadump to use CMA.
3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
@@ -172,6 +183,10 @@ NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
2. If firmware-assisted dump fails to reserve memory then it
will fallback to existing kdump mechanism if 'crashkernel='
option is set at kernel cmdline.
+ 3. if user wants to capture all of user space memory and ok with
+ reserved memory not available to production system, then
+ 'fadump=nocma' kernel parameter can be used to fallback to
+ old behaviour.
Sysfs/debugfs files:
------------
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 845920cd3639..70ca6791b69a 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -48,6 +48,10 @@
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+/* Alignement per CMA requirement. */
+#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
+ max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
+
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
@@ -141,6 +145,7 @@ struct fw_dump {
unsigned long fadump_supported:1;
unsigned long dump_active:1;
unsigned long dump_registered:1;
+ unsigned long nocma:1;
};
/*
@@ -200,7 +205,7 @@ struct fad_crash_memory_ranges {
unsigned long long size;
};
-extern int is_fadump_boot_memory_area(u64 addr, ulong size);
+extern int is_fadump_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
extern int fadump_reserve_mem(void);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index bb5ab46f40dd..25abf61d1467 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -35,6 +35,7 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
+#include <linux/cma.h>
#include <asm/debugfs.h>
#include <asm/page.h>
@@ -46,6 +47,9 @@
static struct fw_dump fw_dump;
static struct fadump_mem_struct fdm;
static const struct fadump_mem_struct *fdm_active;
+#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+#endif
static DEFINE_MUTEX(fadump_mutex);
struct fad_crash_memory_ranges *crash_memory_ranges;
@@ -53,6 +57,67 @@ int crash_memory_ranges_size;
int crash_mem_ranges;
int max_crash_mem_ranges;
+#ifdef CONFIG_CMA
+/*
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
+ *
+ * This function initializes CMA area from fadump reserved memory.
+ * The total size of fadump reserved memory covers for boot memory size
+ * + cpu data size + hpte size and metadata.
+ * Initialize only the area equivalent to boot memory size for CMA use.
+ * The reamining portion of fadump reserved memory will be not given
+ * to CMA and pages for thoes will stay reserved. boot memory size is
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
+ * But for some reason even if it fails we still have the memory reservation
+ * with us and we can still continue doing fadump.
+ */
+int __init fadump_cma_init(void)
+{
+ unsigned long long base, size;
+ int rc;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ /*
+ * Do not use CMA if user has provided fadump=nocma kernel parameter.
+ * Return 1 to continue with fadump old behaviour.
+ */
+ if (fw_dump.nocma)
+ return 1;
+
+ base = fw_dump.reserve_dump_area_start;
+ size = fw_dump.boot_memory_size;
+
+ if (!size)
+ return 0;
+
+ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
+ if (rc) {
+ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
+ /*
+ * Though the CMA init has failed we still have memory
+ * reservation with us. The reserved memory will be
+ * blocked from production system usage. Hence return 1,
+ * so that we can continue with fadump.
+ */
+ return 1;
+ }
+
+ /*
+ * So we now have successfully initialized cma area for fadump.
+ */
+ pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
+ "bytes of memory reserved for firmware-assisted dump\n",
+ cma_get_size(fadump_cma),
+ (unsigned long)cma_get_base(fadump_cma) >> 20,
+ fw_dump.reserve_dump_area_size);
+ return 1;
+}
+#else
+static int __init fadump_cma_init(void) { return 1; }
+#endif /* CONFIG_CMA */
+
/* Scan the Firmware Assisted dump configuration details. */
int __init early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data)
@@ -118,13 +183,19 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
/*
* If fadump is registered, check if the memory provided
- * falls within boot memory area.
+ * falls within boot memory area and reserved memory area.
*/
-int is_fadump_boot_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, ulong size)
{
+ u64 d_start = fw_dump.reserve_dump_area_start;
+ u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+
if (!fw_dump.dump_registered)
return 0;
+ if (((addr + size) > d_start) && (addr <= d_end))
+ return 1;
+
return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
}
@@ -165,6 +236,35 @@ static int is_boot_memory_area_contiguous(void)
return ret;
}
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+static bool is_reserved_memory_area_contiguous(void)
+{
+ struct memblock_region *reg;
+ unsigned long start, end;
+ unsigned long d_start = fw_dump.reserve_dump_area_start;
+ unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
+
+ for_each_memblock(memory, reg) {
+ start = max(d_start, (unsigned long)reg->base);
+ end = min(d_end, (unsigned long)(reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
+ break;
+
+ if (end == d_end)
+ return true;
+
+ d_start = end + 1;
+ }
+ }
+
+ return false;
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
@@ -371,8 +471,15 @@ int __init fadump_reserve_mem(void)
*/
if (fdm_active)
fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else
+ else {
fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+#ifdef CONFIG_CMA
+ if (!fw_dump.nocma)
+ fw_dump.boot_memory_size =
+ ALIGN(fw_dump.boot_memory_size,
+ FADUMP_CMA_ALIGNMENT);
+#endif
+ }
/*
* Calculate the memory boundary.
@@ -419,8 +526,9 @@ int __init fadump_reserve_mem(void)
fw_dump.fadumphdr_addr =
be64_to_cpu(fdm_active->rmr_region.destination_address) +
be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %p\n",
- (void *) fw_dump.fadumphdr_addr);
+ pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
} else {
size = get_fadump_area_size();
@@ -448,10 +556,11 @@ int __init fadump_reserve_mem(void)
(unsigned long)(size >> 20),
(unsigned long)(base >> 20),
(unsigned long)(memblock_phys_mem_size() >> 20));
- }
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
+ return fadump_cma_init();
+ }
return 1;
}
@@ -470,6 +579,10 @@ static int __init early_fadump_param(char *p)
fw_dump.fadump_enabled = 1;
else if (strncmp(p, "off", 3) == 0)
fw_dump.fadump_enabled = 0;
+ else if (strncmp(p, "nocma", 5) == 0) {
+ fw_dump.fadump_enabled = 1;
+ fw_dump.nocma = 1;
+ }
return 0;
}
@@ -518,8 +631,10 @@ static int register_fw_dump(struct fadump_mem_struct *fdm)
break;
case -3:
if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while "
- "registering fadump\n");
+ pr_err("Can't have holes in boot memory area while registering fadump\n");
+ else if (!is_reserved_memory_area_contiguous())
+ pr_err("Can't have holes in reserved memory area while"
+ " registering fadump\n");
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Parameter Error(%d).\n", rc);
@@ -1222,7 +1337,7 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
return 0;
}
-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
{
int rc = 0;
unsigned int wait_time;
@@ -1253,9 +1368,8 @@ void fadump_cleanup(void)
{
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- init_fadump_mem_struct(&fdm,
- be64_to_cpu(fdm_active->cpu_state_data.destination_address));
- fadump_invalidate_dump(&fdm);
+ /* pass the same memory dump structure provided by platform */
+ fadump_invalidate_dump(fdm_active);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
fadump_unregister_dump(&fdm);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index b0f5b5d7cc2e..aa82a83e4c39 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -389,8 +389,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
phys_addr = lmb->base_addr;
#ifdef CONFIG_FA_DUMP
- /* Don't hot-remove memory that falls in fadump boot memory area */
- if (is_fadump_boot_memory_area(phys_addr, block_sz))
+ /*
+ * Don't hot-remove memory that falls in fadump boot memory area
+ * and memory that is reserved for capturing old kernel memory.
+ */
+ if (is_fadump_memory_area(phys_addr, block_sz))
return false;
#endif