Skip to content

Commit bd431e7

Browse files
committed
md: allow removing faulty rdev during resync
JIRA: https://issues.redhat.com/browse/RHEL-94433 commit c0ffeb6 Author: Zheng Qixing <zhengqixing@huawei.com> Date: Mon Jul 7 15:54:12 2025 +0800 md: allow removing faulty rdev during resync During RAID resync, faulty rdev cannot be removed and will result in "Device or resource busy" error when attempting hot removal. Reproduction steps: mdadm -Cv /dev/md0 -l1 -n3 -e1.2 /dev/sd{b..d} mdadm /dev/md0 -f /dev/sdb mdadm /dev/md0 -r /dev/sdb -> mdadm: hot remove failed for /dev/sdb: Device or resource busy After commit 4b10a3b ("md: ensure resync is prioritized over recovery"), when a device becomes faulty during resync, the md_choose_sync_action() function returns early without calling remove_and_add_spares(), preventing faulty device removal. This patch extracts a helper function remove_spares() to support removing faulty devices during RAID resync operations. Fixes: 4b10a3b ("md: ensure resync is prioritized over recovery") Signed-off-by: Zheng Qixing <zhengqixing@huawei.com> Reviewed-by: Li Nan <linan122@huawei.com> Link: https://lore.kernel.org/linux-raid/20250707075412.150301-1-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Nigel Croxon <ncroxon@redhat.com>
1 parent af73b3b commit bd431e7

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

drivers/md/md.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9485,17 +9485,11 @@ static bool md_spares_need_change(struct mddev *mddev)
94859485
return false;
94869486
}
94879487

9488-
static int remove_and_add_spares(struct mddev *mddev,
9489-
struct md_rdev *this)
9488+
static int remove_spares(struct mddev *mddev, struct md_rdev *this)
94909489
{
94919490
struct md_rdev *rdev;
9492-
int spares = 0;
94939491
int removed = 0;
94949492

9495-
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9496-
/* Mustn't remove devices when resync thread is running */
9497-
return 0;
9498-
94999493
rdev_for_each(rdev, mddev) {
95009494
if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
95019495
!mddev->pers->hot_remove_disk(mddev, rdev)) {
@@ -9509,6 +9503,21 @@ static int remove_and_add_spares(struct mddev *mddev,
95099503
if (removed && mddev->kobj.sd)
95109504
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
95119505

9506+
return removed;
9507+
}
9508+
9509+
static int remove_and_add_spares(struct mddev *mddev,
9510+
struct md_rdev *this)
9511+
{
9512+
struct md_rdev *rdev;
9513+
int spares = 0;
9514+
int removed = 0;
9515+
9516+
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9517+
/* Mustn't remove devices when resync thread is running */
9518+
return 0;
9519+
9520+
removed = remove_spares(mddev, this);
95129521
if (this && removed)
95139522
goto no_add;
95149523

@@ -9551,6 +9560,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
95519560

95529561
/* Check if resync is in progress. */
95539562
if (mddev->recovery_cp < MaxSector) {
9563+
remove_spares(mddev, NULL);
95549564
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
95559565
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
95569566
return true;

0 commit comments

Comments
 (0)