summaryrefslogtreecommitdiff |
diff options
author | Johannes Thumshirn <jthumshirn@suse.de> | 2019-07-09 11:29:10 +0200 |
---|---|---|
committer | Johannes Thumshirn <jthumshirn@suse.de> | 2019-07-09 11:29:10 +0200 |
commit | 4a619a38304215060f4322dbbaa085d1dea51118 (patch) | |
tree | 2047672c2a05d9bbb13616bcd154b0715976d2c1 | |
parent | 020ed56f0e5a9013e6650515ba7aacb1ff217737 (diff) | |
parent | 8decc12b80f424b88c48eb451b692025d62f8329 (diff) |
Merge remote-tracking branch 'origin/SLE15-SP1' into SLE12-SP5
Conflicts:
series.conf
suse-commit: a664f8e985d3e72bf57ebe38cf02af929155ee5f
-rw-r--r-- | arch/x86/include/asm/cacheinfo.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cacheinfo.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpu.h | 3 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 14 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 37 | ||||
-rw-r--r-- | include/linux/page-flags.h | 5 | ||||
-rw-r--r-- | include/linux/swapops.h | 19 | ||||
-rw-r--r-- | mm/hugetlb.c | 81 | ||||
-rw-r--r-- | mm/madvise.c | 13 | ||||
-rw-r--r-- | mm/memory-failure.c | 349 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 30 |
13 files changed, 318 insertions, 272 deletions
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index 86b63c7feab7..e958e28f7ab5 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -3,6 +3,5 @@ #define _ASM_X86_CACHEINFO_H void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); #endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4690fe155c59..5e4c0ca33be2 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -241,7 +241,7 @@ int amd_cache_northbridges(void) return -ENODEV; root = NULL; - while ((root = next_northbridge(root, amd_root_ids)) != NULL) + while ((root = next_northbridge(root, root_ids)) != NULL) root_count++; if (root_count) { @@ -283,7 +283,7 @@ int amd_cache_northbridges(void) * correct PCI roots. */ for (j = 1; j < roots_per_misc; j++) - root = next_northbridge(root, amd_root_ids); + root = next_northbridge(root, root_ids); } if (amd_gart_present()) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 5d4cf899264b..8f9ebf0c3ace 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -395,6 +395,22 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, } } +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +{ + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (!cpuid_edx(0x80000006)) + return; + + /* + * LLC is at the core complex level. + * Core complex ID is ApicId[3] for these processors. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; +} + /* * disable a L3 cache index by using a disable-slot * @@ -680,22 +696,6 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) } } -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) -{ - /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. - */ - if (!cpuid_edx(0x80000006)) - return; - - /* - * LLC is at the core complex level. - * Core complex ID is ApicId[3] for these processors. - */ - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; -} - void init_amd_cacheinfo(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f18d8c677a18..de8929a147b0 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -27,7 +27,6 @@ struct cpu_dev { } legacy_models[5]; #endif }; -extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); struct _tlb_table { unsigned char descriptor; @@ -51,5 +50,7 @@ extern void x86_spec_ctrl_setup_ap(void); extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern int detect_ht_early(struct cpuinfo_x86 *c); +extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); #endif /* ARCH_X86_CPU_H */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2ea718337509..db1df19519da 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -865,6 +865,19 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } +static int hugetlbfs_error_remove_page(struct address_space *mapping, + struct page *page) +{ + struct inode *inode = mapping->host; + pgoff_t index = page->index; + + remove_huge_page(page); + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + + return 0; +} + static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); @@ -980,6 +993,7 @@ static const struct address_space_operations hugetlbfs_aops = { .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, .migratepage = hugetlbfs_migrate_page, + .error_remove_page = hugetlbfs_error_remove_page, }; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e77daf6144a3..33304cc6574f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -92,7 +92,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); @@ -172,10 +171,7 @@ static inline void hugetlb_show_meminfo(void) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address) 0 -static inline int dequeue_hwpoisoned_huge_page(struct page *page) -{ - return 0; -} + static inline bool isolate_huge_page(struct page *page, struct list_head *list) { @@ -468,6 +464,7 @@ static inline pgoff_t basepage_index(struct page *page) return __basepage_index(page); } +extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); static inline bool hugepage_migration_supported(struct hstate *h) @@ -530,15 +527,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; } -#define hstate_index_to_shift(index) 0 -#define hstate_index(h) 0 + +static inline unsigned hstate_index_to_shift(unsigned index) +{ + return 0; +} + +static inline int hstate_index(struct hstate *h) +{ + return 0; +} static inline pgoff_t basepage_index(struct page *page) { return page->index; } -#define dissolve_free_huge_pages(s, e) 0 -#define hugepage_migration_supported(h) false + +static inline int dissolve_free_huge_page(struct page *page) +{ + return 0; +} + +static inline int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline bool hugepage_migration_supported(struct hstate *h) +{ + return false; +} static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 86b279458b67..915e691899e6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -359,8 +359,13 @@ PAGEFLAG_FALSE(Uncached) PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) +extern bool set_hwpoison_free_buddy_page(struct page *page); #else PAGEFLAG_FALSE(HWPoison) +static inline bool set_hwpoison_free_buddy_page(struct page *page) +{ + return 0; +} #define __PG_HWPOISON 0 #endif diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 361090cef69e..370dff20c2e8 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -249,11 +249,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline bool test_set_page_hwpoison(struct page *page) -{ - return TestSetPageHWPoison(page); -} - static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); @@ -264,15 +259,6 @@ static inline void num_poisoned_pages_dec(void) atomic_long_dec(&num_poisoned_pages); } -static inline void num_poisoned_pages_add(long num) -{ - atomic_long_add(num, &num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long num) -{ - atomic_long_sub(num, &num_poisoned_pages); -} #else static inline swp_entry_t make_hwpoison_entry(struct page *page) @@ -285,11 +271,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp) return 0; } -static inline bool test_set_page_hwpoison(struct page *page) -{ - return false; -} - static inline void num_poisoned_pages_inc(void) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6e21d2ff98f4..6c28aa408f5f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -22,7 +22,6 @@ #include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> -#include <linux/page-isolation.h> #include <linux/jhash.h> #include <asm/page.h> @@ -865,7 +864,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) struct page *page; list_for_each_entry(page, &h->hugepage_freelists[nid], lru) - if (!is_migrate_isolate_page(page)) + if (!PageHWPoison(page)) break; /* * if 'non-isolated free hugepage' not found on the list, @@ -1435,28 +1434,48 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the - * number of free hugepages would be reduced below the number of reserved - * hugepages. + * nothing for in-use hugepages and non-hugepages. + * This function returns values like below: + * + * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use + * (allocated or reserved.) + * 0: successfully dissolved free hugepages or the page is not a + * hugepage (considered as already dissolved) */ -static int dissolve_free_huge_page(struct page *page) +int dissolve_free_huge_page(struct page *page) { - int rc = 0; + int rc = -EBUSY; + + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; spin_lock(&hugetlb_lock); - if (PageHuge(page) && !page_count(page)) { + if (!PageHuge(page)) { + rc = 0; + goto out; + } + + if (!page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); int nid = page_to_nid(head); - if (h->free_huge_pages - h->resv_huge_pages == 0) { - rc = -EBUSY; + if (h->free_huge_pages - h->resv_huge_pages == 0) goto out; + /* + * Move PageHWPoison flag from head page to the raw error page, + * which makes any subpages rather than the error page reusable. + */ + if (PageHWPoison(head) && page != head) { + SetPageHWPoison(page); + ClearPageHWPoison(head); } list_del(&head->lru); h->free_huge_pages--; h->free_huge_pages_node[nid]--; h->max_huge_pages--; update_and_free_page(h, head); + rc = 0; } out: spin_unlock(&hugetlb_lock); @@ -1482,11 +1501,9 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { page = pfn_to_page(pfn); - if (PageHuge(page) && !page_count(page)) { - rc = dissolve_free_huge_page(page); - if (rc) - break; - } + rc = dissolve_free_huge_page(page); + if (rc) + break; } return rc; @@ -4825,40 +4842,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } -#ifdef CONFIG_MEMORY_FAILURE - -/* - * This function is called from memory failure code. - */ -int dequeue_hwpoisoned_huge_page(struct page *hpage) -{ - struct hstate *h = page_hstate(hpage); - int nid = page_to_nid(hpage); - int ret = -EBUSY; - - spin_lock(&hugetlb_lock); - /* - * Just checking !page_huge_active is not enough, because that could be - * an isolated/hwpoisoned hugepage (which have >0 refcount). - */ - if (!page_huge_active(hpage) && !page_count(hpage)) { - /* - * Hwpoisoned hugepage isn't linked to activelist or freelist, - * but dangling hpage->lru can trigger list-debug warnings - * (this happens when we call unpoison_memory() on it), - * so let it point to itself with list_del_init(). - */ - list_del_init(&hpage->lru); - set_page_refcounted(hpage); - h->free_huge_pages--; - h->free_huge_pages_node[nid]--; - ret = 0; - } - spin_unlock(&hugetlb_lock); - return ret; -} -#endif - bool isolate_huge_page(struct page *page, struct list_head *list) { bool ret = true; diff --git a/mm/madvise.c b/mm/madvise.c index c95aa2f5a7f1..50c34c388fae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -609,12 +609,13 @@ static int madvise_inject_error(int behavior, { struct page *page; struct zone *zone; + unsigned int order; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - for (; start < end; start += PAGE_SIZE << - compound_order(compound_head(page))) { + + for (; start < end; start += PAGE_SIZE << order) { unsigned long pfn; int ret; @@ -623,6 +624,13 @@ static int madvise_inject_error(int behavior, return ret; pfn = page_to_pfn(page); + /* + * When soft offlining hugepages, after migrating the page + * we dissolve it, therefore in the second loop "page" will + * no longer be a compound page, and order will be 0. + */ + order = compound_order(compound_head(page)); + if (PageHWPoison(page)) { put_page(page); continue; @@ -637,7 +645,6 @@ static int madvise_inject_error(int behavior, return ret; continue; } - pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", pfn, start); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c919c9e263a9..a86f879e0157 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -49,7 +49,6 @@ #include <linux/swap.h> #include <linux/backing-dev.h> #include <linux/migrate.h> -#include <linux/page-isolation.h> #include <linux/suspend.h> #include <linux/slab.h> #include <linux/swapops.h> @@ -59,6 +58,7 @@ #include <linux/memremap.h> #include <linux/kfifo.h> #include <linux/ratelimit.h> +#include <linux/page-isolation.h> #include "internal.h" #include "ras/ras_event.h" @@ -596,6 +596,39 @@ static int delete_from_lru_cache(struct page *p) return -EIO; } +static int truncate_error_page(struct page *p, unsigned long pfn, + struct address_space *mapping) +{ + int ret = MF_FAILED; + + if (mapping->a_ops->error_remove_page) { + int err = mapping->a_ops->error_remove_page(mapping, p); + + if (err != 0) { + pr_info("Memory failure: %#lx: Failed to punch page: %d\n", + pfn, err); + } else if (page_has_private(p) && + !try_to_release_page(p, GFP_NOIO)) { + pr_info("Memory failure: %#lx: failed to release buffers\n", + pfn); + } else { + ret = MF_RECOVERED; + } + } else { + /* + * If the file system doesn't support it just invalidate + * This fails on dirty or anything with private pages + */ + if (invalidate_inode_page(p)) + ret = MF_RECOVERED; + else + pr_info("Memory failure: %#lx: Failed to invalidate\n", + pfn); + } + + return ret; +} + /* * Error hit kernel page. * Do nothing, try to be lucky and not touch this instead. For a few cases we @@ -620,8 +653,6 @@ static int me_unknown(struct page *p, unsigned long pfn) */ static int me_pagecache_clean(struct page *p, unsigned long pfn) { - int err; - int ret = MF_FAILED; struct address_space *mapping; delete_from_lru_cache(p); @@ -653,30 +684,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_mutex or not for this? Right now we don't. */ - if (mapping->a_ops->error_remove_page) { - err = mapping->a_ops->error_remove_page(mapping, p); - if (err != 0) { - pr_info("Memory failure: %#lx: Failed to punch page: %d\n", - pfn, err); - } else if (page_has_private(p) && - !try_to_release_page(p, GFP_NOIO)) { - pr_info("Memory failure: %#lx: failed to release buffers\n", - pfn); - } else { - ret = MF_RECOVERED; - } - } else { - /* - * If the file system doesn't support it just invalidate - * This fails on dirty or anything with private pages - */ - if (invalidate_inode_page(p)) - ret = MF_RECOVERED; - else - pr_info("Memory failure: %#lx: Failed to invalidate\n", - pfn); - } - return ret; + return truncate_error_page(p, pfn, mapping); } /* @@ -782,24 +790,29 @@ static int me_huge_page(struct page *p, unsigned long pfn) { int res = 0; struct page *hpage = compound_head(p); + struct address_space *mapping; if (!PageHuge(hpage)) return MF_DELAYED; - /* - * We can safely recover from error on free or reserved (i.e. - * not in-use) hugepage by dequeuing it from freelist. - * To check whether a hugepage is in-use or not, we can't use - * page->lru because it can be used in other hugepage operations, - * such as __unmap_hugepage_range() and gather_surplus_pages(). - * So instead we use page_mapping() and PageAnon(). - */ - if (!(page_mapping(hpage) || PageAnon(hpage))) { - res = dequeue_hwpoisoned_huge_page(hpage); - if (!res) - return MF_RECOVERED; + mapping = page_mapping(hpage); + if (mapping) { + res = truncate_error_page(hpage, pfn, mapping); + } else { + unlock_page(hpage); + /* + * migration entry prevents later access on error anonymous + * hugepage, so we can free and dissolve it into buddy to + * save healthy subpages. + */ + if (PageAnon(hpage)) + put_page(hpage); + dissolve_free_huge_page(p); + res = MF_RECOVERED; + lock_page(hpage); } - return MF_DELAYED; + + return res; } /* @@ -898,7 +911,7 @@ static int page_action(struct page_state *ps, struct page *p, count = page_count(p) - 1; if (ps->action == me_swapcache_dirty && result == MF_DELAYED) count--; - if (count != 0) { + if (count > 0) { pr_err("Memory failure: %#lx: %s still referenced by %d users\n", pfn, action_page_types[ps->type], count); result = MF_FAILED; @@ -1051,20 +1064,84 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, return unmap_success; } -static void set_page_hwpoison_huge_page(struct page *hpage) +static int identify_page_state(unsigned long pfn, struct page *p, + unsigned long page_flags) { - int i; - int nr_pages = 1 << compound_order(hpage); - for (i = 0; i < nr_pages; i++) - SetPageHWPoison(hpage + i); + struct page_state *ps; + + /* + * The first check uses the current page flags which may not have any + * relevant information. The second check with the saved page flags is + * carried out only if the first check can't determine the page status. + */ + for (ps = error_states;; ps++) + if ((p->flags & ps->mask) == ps->res) + break; + + page_flags |= (p->flags & (1UL << PG_dirty)); + + if (!ps->mask) + for (ps = error_states;; ps++) + if ((page_flags & ps->mask) == ps->res) + break; + return page_action(ps, p, pfn); } -static void clear_page_hwpoison_huge_page(struct page *hpage) +static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) { - int i; - int nr_pages = 1 << compound_order(hpage); - for (i = 0; i < nr_pages; i++) - ClearPageHWPoison(hpage + i); + struct page *p = pfn_to_page(pfn); + struct page *head = compound_head(p); + int res; + unsigned long page_flags; + + if (TestSetPageHWPoison(head)) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", + pfn); + return 0; + } + + num_poisoned_pages_inc(); + + if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) { + /* + * Check "filter hit" and "race with other subpage." + */ + lock_page(head); + if (PageHWPoison(head)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != head && TestSetPageHWPoison(head))) { + num_poisoned_pages_dec(); + unlock_page(head); + return 0; + } + } + unlock_page(head); + dissolve_free_huge_page(p); + action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED); + return 0; + } + + lock_page(head); + page_flags = head->flags; + + if (!PageHWPoison(head)) { + pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); + num_poisoned_pages_dec(); + unlock_page(head); + put_hwpoison_page(head); + return 0; + } + + if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) { + action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); + res = -EBUSY; + goto out; + } + + res = identify_page_state(pfn, p, page_flags); +out: + unlock_page(head); + return res; } static int memory_failure_dev_pagemap(unsigned long pfn, int trapno, int flags, @@ -1165,13 +1242,11 @@ out: */ int memory_failure(unsigned long pfn, int trapno, int flags) { - struct page_state *ps; struct page *p; struct page *hpage; struct page *orig_head; struct dev_pagemap *pgmap; int res; - unsigned int nr_pages; unsigned long page_flags; if (!sysctl_memory_failure_recovery) @@ -1188,34 +1263,22 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return memory_failure_dev_pagemap(pfn, trapno, flags, pgmap); p = pfn_to_page(pfn); - orig_head = hpage = compound_head(p); + if (PageHuge(p)) + return memory_failure_hugetlb(pfn, trapno, flags); if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); return 0; } - /* - * Currently errors on hugetlbfs pages are measured in hugepage units, - * so nr_pages should be 1 << compound_order. OTOH when errors are on - * transparent hugepages, they are supposed to be split and error - * measurement is done in normal page units. So nr_pages should be one - * in this case. - */ - if (PageHuge(p)) - nr_pages = 1 << compound_order(hpage); - else /* normal page or thp */ - nr_pages = 1; - num_poisoned_pages_add(nr_pages); + orig_head = hpage = compound_head(p); + num_poisoned_pages_inc(); /* * We need/can do nothing about count=0 pages. * 1) it's a free page, and therefore in safe hand: * prep_new_page() will be the gate keeper. - * 2) it's a free hugepage, which is also safe: - * an affected hugepage will be dequeued from hugepage freelist, - * so there's no concern about reusing it ever after. - * 3) it's part of a non-compound high order page. + * 2) it's part of a non-compound high order page. * Implies some kernel user: cannot stop them from * R/W the page; let's pray that the page has been * used and will be freed some time later. @@ -1226,32 +1289,13 @@ int memory_failure(unsigned long pfn, int trapno, int flags) if (is_free_buddy_page(p)) { action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); return 0; - } else if (PageHuge(hpage)) { - /* - * Check "filter hit" and "race with other subpage." - */ - lock_page(hpage); - if (PageHWPoison(hpage)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - num_poisoned_pages_sub(nr_pages); - unlock_page(hpage); - return 0; - } - } - set_page_hwpoison_huge_page(hpage); - res = dequeue_hwpoisoned_huge_page(hpage); - action_result(pfn, MF_MSG_FREE_HUGE, - res ? MF_IGNORED : MF_DELAYED); - unlock_page(hpage); - return res; } else { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); return -EBUSY; } } - if (!PageHuge(p) && PageTransHuge(hpage)) { + if (PageTransHuge(hpage)) { lock_page(p); if (!PageAnon(p) || unlikely(split_huge_page(p))) { unlock_page(p); @@ -1262,7 +1306,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) pr_err("Memory failure: %#lx: thp split failed\n", pfn); if (TestClearPageHWPoison(p)) - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); put_hwpoison_page(p); return -EBUSY; } @@ -1289,7 +1333,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } - lock_page(hpage); + lock_page(p); /* * The page could have changed compound pages during the locking. @@ -1318,42 +1362,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (!PageHWPoison(p)) { pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); - num_poisoned_pages_sub(nr_pages); - unlock_page(hpage); - put_hwpoison_page(hpage); + num_poisoned_pages_dec(); + unlock_page(p); + put_hwpoison_page(p); return 0; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) - num_poisoned_pages_sub(nr_pages); - unlock_page(hpage); - put_hwpoison_page(hpage); + num_poisoned_pages_dec(); + unlock_page(p); + put_hwpoison_page(p); return 0; } - if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p)) + if (!PageTransTail(p) && !PageLRU(p)) goto identify_page_state; /* - * For error on the tail page, we should set PG_hwpoison - * on the head page to show that the hugepage is hwpoisoned - */ - if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { - action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED); - unlock_page(hpage); - put_hwpoison_page(hpage); - return 0; - } - /* - * Set PG_hwpoison on all pages in an error hugepage, - * because containment is done in hugepage unit for now. - * Since we have done TestSetPageHWPoison() for the head page with - * page lock held, we can safely set PG_hwpoison bits on tail pages. - */ - if (PageHuge(p)) - set_page_hwpoison_huge_page(hpage); - - /* * It's very difficult to mess with pages currently under IO * and in many cases impossible, so we just avoid it here. */ @@ -1382,25 +1407,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } identify_page_state: - res = -EBUSY; - /* - * The first check uses the current page flags which may not have any - * relevant information. The second check with the saved page flagss is - * carried out only if the first check can't determine the page status. - */ - for (ps = error_states;; ps++) - if ((p->flags & ps->mask) == ps->res) - break; - - page_flags |= (p->flags & (1UL << PG_dirty)); - - if (!ps->mask) - for (ps = error_states;; ps++) - if ((page_flags & ps->mask) == ps->res) - break; - res = page_action(ps, p, pfn); + res = identify_page_state(pfn, p, page_flags); out: - unlock_page(hpage); + unlock_page(p); return res; } EXPORT_SYMBOL_GPL(memory_failure); @@ -1522,7 +1531,6 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int freeit = 0; - unsigned int nr_pages; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -1567,20 +1575,7 @@ int unpoison_memory(unsigned long pfn) return 0; } - nr_pages = 1 << compound_order(page); - if (!get_hwpoison_page(p)) { - /* - * Since HWPoisoned hugepage should have non-zero refcount, - * race between memory failure and unpoison seems to happen. - * In such case unpoison fails and memory failure runs - * to the end. - */ - if (PageHuge(page)) { - unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n", - pfn, &unpoison_rs); - return 0; - } if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", @@ -1598,10 +1593,8 @@ int unpoison_memory(unsigned long pfn) if (TestClearPageHWPoison(page)) { unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", pfn, &unpoison_rs); - num_poisoned_pages_sub(nr_pages); + num_poisoned_pages_dec(); freeit = 1; - if (PageHuge(page)) - clear_page_hwpoison_huge_page(page); } unlock_page(page); @@ -1727,14 +1720,19 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret > 0) ret = -EIO; } else { - /* overcommit hugetlb page will be freed to buddy */ - if (PageHuge(page)) { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - num_poisoned_pages_add(1 << compound_order(hpage)); - } else { - SetPageHWPoison(page); - num_poisoned_pages_inc(); + /* + * We set PG_hwpoison only when the migration source hugepage + * was successfully dissolved, because otherwise hwpoisoned + * hugepage remains on free hugepage list, then userspace will + * find it as SIGBUS by allocation failure. That's not expected + * in soft-offlining. + */ + ret = dissolve_free_huge_page(page); + if (!ret) { + if (set_hwpoison_free_buddy_page(page)) + num_poisoned_pages_inc(); + else + ret = -EBUSY; } } return ret; @@ -1823,6 +1821,7 @@ static int __soft_offline_page(struct page *page, int flags) static int soft_offline_in_use_page(struct page *page, int flags) { int ret; + int mt; struct page *hpage = compound_head(page); if (!PageHuge(page) && PageTransHuge(hpage)) { @@ -1839,26 +1838,34 @@ static int soft_offline_in_use_page(struct page *page, int flags) unlock_page(page); } + /* + * Setting MIGRATE_ISOLATE here ensures that the page will be linked + * to free list immediately (not via pcplist) when released after + * successful page migration. Otherwise we can't guarantee that the + * page is really free after put_page() returns, so + * set_hwpoison_free_buddy_page() highly likely fails. + */ + mt = get_pageblock_migratetype(page); + set_pageblock_migratetype(page, MIGRATE_ISOLATE); if (PageHuge(page)) ret = soft_offline_huge_page(page, flags); else ret = __soft_offline_page(page, flags); - + set_pageblock_migratetype(page, mt); return ret; } -static void soft_offline_free_page(struct page *page) +static int soft_offline_free_page(struct page *page) { - if (PageHuge(page)) { - struct page *hpage = compound_head(page); + int rc = dissolve_free_huge_page(page); - set_page_hwpoison_huge_page(hpage); - if (!dequeue_hwpoisoned_huge_page(hpage)) - num_poisoned_pages_add(1 << compound_order(hpage)); - } else { - if (!TestSetPageHWPoison(page)) + if (!rc) { + if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); + else + rc = -EBUSY; } + return rc; } /** @@ -1910,7 +1917,7 @@ int soft_offline_page(struct page *page, int flags) if (ret > 0) ret = soft_offline_in_use_page(page, flags); else if (ret == 0) - soft_offline_free_page(page); + ret = soft_offline_free_page(page); return ret; } diff --git a/mm/migrate.c b/mm/migrate.c index a157d67877c5..732585afec56 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1225,7 +1225,7 @@ out: * intentionally. Although it's rather weird, * it's how HWPoison flag works at the moment. */ - if (!test_set_page_hwpoison(page)) + if (set_hwpoison_free_buddy_page(page)) num_poisoned_pages_inc(); } } else { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 55a8fc0ce251..35437fbe799a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8162,3 +8162,33 @@ bool is_free_buddy_page(struct page *page) return order < MAX_ORDER; } + +#ifdef CONFIG_MEMORY_FAILURE +/* + * Set PG_hwpoison flag if a given page is confirmed to be a free page. This + * test is performed under the zone lock to prevent a race against page + * allocation. + */ +bool set_hwpoison_free_buddy_page(struct page *page) +{ + struct zone *zone = page_zone(page); + unsigned long pfn = page_to_pfn(page); + unsigned long flags; + unsigned int order; + bool hwpoisoned = false; + + spin_lock_irqsave(&zone->lock, flags); + for (order = 0; order < MAX_ORDER; order++) { + struct page *page_head = page - (pfn & ((1 << order) - 1)); + + if (PageBuddy(page_head) && page_order(page_head) >= order) { + if (!TestSetPageHWPoison(page)) + hwpoisoned = true; + break; + } + } + spin_unlock_irqrestore(&zone->lock, flags); + + return hwpoisoned; +} +#endif |