Home Home > GIT Browse > SLE15-SP2
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHannes Reinecke <hare@suse.de>2019-08-23 14:03:14 +0200
committerHannes Reinecke <hare@suse.de>2019-08-23 14:03:14 +0200
commitb13a6ded749dc23f16dd774d034833ee5083652f (patch)
treec17f46b694dd0ca8cb91c9647c5be9128eea3994
parent234c64fb7e6e8f66330e9817f0519d5ef580c81e (diff)
Refresh patches.suse/md-display-timeout-error.patch.
-rw-r--r--drivers/md/md.c44
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/raid10.c47
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--include/uapi/linux/raid/md_p.h2
5 files changed, 83 insertions, 14 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 24638ccedce4..4c8deb542190 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -840,8 +840,10 @@ static void super_written(struct bio *bio)
if (bio->bi_status) {
pr_err("md: super_written gets error=%d\n", bio->bi_status);
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags)
- && (bio->bi_opf & MD_FAILFAST)) {
+ if (!test_bit(Faulty, &rdev->flags)) {
+ if (bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ } else if (bio->bi_opf & MD_FAILFAST) {
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
set_bit(LastDev, &rdev->flags);
}
@@ -1192,6 +1194,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1688,6 +1691,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
@@ -1810,6 +1814,9 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
break;
case MD_DISK_ROLE_JOURNAL: /* journal device */
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
+ /* probably legacy 'timed-out' device */
+ if (mddev->level == 10 || mddev->level == 1)
+ goto timeout;
/* journal device without journal feature */
pr_warn("md: journal device provided without journal feature, ignoring the device\n");
return -EINVAL;
@@ -1818,6 +1825,11 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
rdev->raid_disk = 0;
break;
+ case MD_DISK_ROLE_TIMEOUT: /* faulty, timeout */
+ timeout:
+ set_bit(Faulty, &rdev->flags);
+ set_bit(Timeout, &rdev->flags);
+ break;
default:
rdev->saved_raid_disk = role;
if ((le32_to_cpu(sb->feature_map) &
@@ -1996,9 +2008,12 @@ retry:
rdev_for_each(rdev2, mddev) {
i = rdev2->desc_nr;
- if (test_bit(Faulty, &rdev2->flags))
- sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
- else if (test_bit(In_sync, &rdev2->flags))
+ if (test_bit(Faulty, &rdev2->flags)) {
+ if (test_bit(Timeout, &rdev2->flags))
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_TIMEOUT);
+ else
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
+ } else if (test_bit(In_sync, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
else if (test_bit(Journal, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
@@ -2755,6 +2770,8 @@ state_show(struct md_rdev *rdev, char *page)
(!test_bit(ExternalBbl, &flags) &&
rdev->badblocks.unacked_exist))
len += sprintf(page+len, "faulty%s", sep);
+ if (test_bit(Timeout, &flags))
+ len += sprintf(page+len, "timeout%s", sep);
if (test_bit(In_sync, &flags))
len += sprintf(page+len, "in_sync%s", sep);
if (test_bit(Journal, &flags))
@@ -2810,6 +2827,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
err = 0;
else
err = -EBUSY;
+ } else if (cmd_match(buf, "timeout") && rdev->mddev->pers) {
+ md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags))
+ set_bit(Timeout, &rdev->flags);
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -3061,6 +3083,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->raid_disk = slot;
/* assume it is working */
clear_bit(Faulty, &rdev->flags);
+ clear_bit(Timeout, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -6365,9 +6388,11 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
info.minor = MINOR(rdev->bdev->bd_dev);
info.raid_disk = rdev->raid_disk;
info.state = 0;
- if (test_bit(Faulty, &rdev->flags))
+ if (test_bit(Faulty, &rdev->flags)) {
info.state |= (1<<MD_DISK_FAULTY);
- else if (test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Timeout, &rdev->flags))
+ info.state |= (1<<MD_DISK_TIMEOUT);
+ } else if (test_bit(In_sync, &rdev->flags)) {
info.state |= (1<<MD_DISK_ACTIVE);
info.state |= (1<<MD_DISK_SYNC);
}
@@ -7940,7 +7965,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
if (test_bit(Journal, &rdev->flags))
seq_printf(seq, "(J)");
if (test_bit(Faulty, &rdev->flags)) {
- seq_printf(seq, "(F)");
+ if (test_bit(Timeout, &rdev->flags))
+ seq_printf(seq, "(T)");
+ else
+ seq_printf(seq, "(F)");
continue;
}
if (rdev->raid_disk < 0)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 10f98200e2f8..4ef41558e13c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -205,6 +205,9 @@ enum flag_bits {
* multiqueue device should check if there
* is collision between write behind bios.
*/
+ Timeout, /* Device fault due to timeout.
+ * 'Faulty' is required to be set.
+ */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8a1354a08a1a..c06f468f420c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -356,6 +356,7 @@ static void raid10_end_read_request(struct bio *bio)
slot = r10_bio->read_slot;
rdev = r10_bio->devs[slot].rdev;
+ r10_bio->devs[slot].error = bio->bi_status;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -446,6 +447,7 @@ static void raid10_end_write_request(struct bio *bio)
repl = 0;
rdev = conf->mirrors[dev].rdev;
}
+ r10_bio->devs[slot].error = bio->bi_status;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
@@ -472,6 +474,8 @@ static void raid10_end_write_request(struct bio *bio)
*/
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
+ if (bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@@ -909,7 +913,11 @@ static void flush_pending_writes(struct r10conf *conf)
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
- bio_io_error(bio);
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_status = BLK_STS_TIMEOUT;
+ else
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bio->bi_disk->queue)))
/* Just ignore it */
@@ -1094,7 +1102,11 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
- bio_io_error(bio);
+ if (test_bit(Timeout, &rdev->flags))
+ bio->bi_status = BLK_STS_TIMEOUT;
+ else
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bio->bi_disk->queue)))
/* Just ignore it */
@@ -2070,6 +2082,9 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
} else if (test_bit(FailFast, &rdev->flags)) {
/* Just give up on this device */
md_error(rdev->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[i].error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
continue;
}
/* Ok, we need to write this bio, either to correct an
@@ -2333,6 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
+ int read_error = r10_bio->devs[r10_bio->read_slot].error;
/* still own a reference to this rdev, so it cannot
* have been cleared recently.
@@ -2356,6 +2372,9 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
pr_notice("md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b);
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ read_error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
@@ -2597,9 +2616,12 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
- } else
+ } else {
md_error(mddev, rdev);
-
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[slot].error == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
rdev_dec_pending(rdev, mddev);
allow_barrier(conf);
r10_bio->state = 0;
@@ -2634,8 +2656,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error ==
+ BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL ||
@@ -2651,8 +2678,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
+ r10_bio->sectors, 0)) {
md_error(conf->mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ r10_bio->devs[m].error ==
+ BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
+ }
}
}
put_buf(r10_bio);
@@ -4856,6 +4888,9 @@ static void end_reshape_write(struct bio *bio)
if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
+ if (test_bit(Faulty, &rdev->flags) &&
+ bio->bi_status == BLK_STS_TIMEOUT)
+ set_bit(Timeout, &rdev->flags);
}
rdev_dec_pending(rdev, mddev);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index d3eaaf3eb1bc..1b6dd7a38f80 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -153,6 +153,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
+ int error;
} devs[0];
};
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index b0d15c73f6d7..531131495349 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -89,6 +89,7 @@
* devices available - and don't try to
* correct read errors.
*/
+#define MD_DISK_TIMEOUT 11 /* disk is faulty due to timeout */
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
@@ -99,6 +100,7 @@
#define MD_DISK_ROLE_SPARE 0xffff
#define MD_DISK_ROLE_FAULTY 0xfffe
#define MD_DISK_ROLE_JOURNAL 0xfffd
+#define MD_DISK_ROLE_TIMEOUT 0xfff0 /* SUSE-only timed-out */
#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */
typedef struct mdp_device_descriptor_s {