block: add configurable error injection

Add a new block error injection interface that allows to inject specific
status code for specific ranges.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Md Haris Iqbal <haris.iqbal@linux.dev>
Link: https://patch.msgid.link/20260611140703.2401204-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig
2026-06-11 16:06:47 +02:00
committed by Jens Axboe
parent d39a63ead3
commit e8dcf2d142
10 changed files with 424 additions and 0 deletions
+59
View File
@@ -0,0 +1,59 @@
.. SPDX-License-Identifier: GPL-2.0
============================
Configurable Error Injection
============================
Overview
--------
Configurable error injection allows injecting specific block layer status codes
for sector ranges of a block device. Errors can be injected unconditionally, or
with a given probability.
To use configurable error injection, CONFIG_BLK_ERROR_INJECTION must be enabled.
The only interface is the error_injection debugfs file, which is created for
each registered gendisk. Writes to this file are used to create or delete rules
and reads return a list of the current error injection sites.
Options
-------
The following options specify the operations:
=================== =======================================================
add add a new rule
removeall remove all existing rules
=================== =======================================================
The following options specify the details of the rule for the add operation:
=================== =======================================================
op=<string> block layer operation this rule applies to. This uses
the XYZ for each REQ_OP_XYZ operation, e.g. READ, WRITE
or DISCARD. Mandatory.
status=<string> Status to return. This uses XYZ for each BLK_STS_XYZ
code, e.g. IOERR or MEDIUM. Mandatory.
start=<number> First block layer sector the rule applies to.
Optional, defaults to 0.
nr_sectors=<number> Number of sectors this rule applies.
Optional, defaults to the remainder of the device.
chance=<number> Only return a failure with a likelihood of 1/chance.
Optional, defaults to 1 (always).
=================== =======================================================
Example
-------
Return BLK_STS_IOERR for one in 10 reads of sector 0 of /dev/nvme0n1:
$ echo 'add,op=READ,start=0,status=IOERR,chance=10' > /sys/kernel/debug/block/nvme0n1/error_injection
Return BLK_STS_MEDIUM for every write to /dev/nvme0n1:
$ echo 'add,op=WRITE,start=0,status=MEDIUM' > /sys/kernel/debug/block/nvme0n1/error_injection
Remove all rules for /dev/nvme0n1:
$ echo 'removeall' > /sys/kernel/debug/block/nvme0n1/error_injection
+1
View File
@@ -22,3 +22,4 @@ Block
switching-sched
writeback_cache_control
ublk
error-injection
+8
View File
@@ -221,6 +221,14 @@ config BLOCK_HOLDER_DEPRECATED
config BLK_MQ_STACKING
bool
config BLK_ERROR_INJECTION
bool "Enable block layer error injection"
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
help
Enable inserting arbitrary block errors through a debugfs interface.
See Documentation/block/error-injection.rst for details.
source "block/Kconfig.iosched"
endif # BLOCK
+1
View File
@@ -13,6 +13,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
disk-events.o blk-ia-ranges.o early-lookup.o
obj-$(CONFIG_BLK_ERROR_INJECTION) += error-injection.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
+4
View File
@@ -50,6 +50,7 @@
#include "blk-cgroup.h"
#include "blk-throttle.h"
#include "blk-ioprio.h"
#include "error-injection.h"
struct dentry *blk_debugfs_root;
@@ -767,6 +768,9 @@ static void __submit_bio_noacct_mq(struct bio *bio)
void submit_bio_noacct_nocheck(struct bio *bio, bool split)
{
if (unlikely(blk_error_inject(bio)))
return;
blk_cgroup_bio_start(bio);
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+5
View File
@@ -19,6 +19,7 @@
#include "blk-wbt.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
#include "error-injection.h"
struct queue_sysfs_entry {
struct attribute attr;
@@ -933,6 +934,8 @@ static void blk_debugfs_remove(struct gendisk *disk)
blk_debugfs_lock_nomemsave(q);
blk_trace_shutdown(q);
if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
blk_error_injection_exit(disk);
debugfs_remove_recursive(q->debugfs_dir);
q->debugfs_dir = NULL;
q->sched_debugfs_dir = NULL;
@@ -963,6 +966,8 @@ int blk_register_queue(struct gendisk *disk)
memflags = blk_debugfs_lock(q);
q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION))
blk_error_injection_init(disk);
if (queue_is_mq(q))
blk_mq_debugfs_register(q);
blk_debugfs_unlock(q, memflags);
+315
View File
@@ -0,0 +1,315 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2026 Christoph Hellwig.
*/
#include <linux/debugfs.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
#include "blk.h"
#include "error-injection.h"
struct blk_error_inject {
struct list_head entry;
sector_t start;
sector_t end;
enum req_op op;
blk_status_t status;
/* only inject every 1 / chance times */
unsigned int chance;
};
DEFINE_STATIC_KEY_FALSE(blk_error_injection_enabled);
bool __blk_error_inject(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
struct blk_error_inject *inj;
rcu_read_lock();
list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) {
if (bio_op(bio) != inj->op)
continue;
/*
* This never matches 0-sized bios like empty WRITEs with
* REQ_PREFLUSH or ZONE_RESET_ALL. While adding a special case
* for them would be trivial, that means any WRITE rule would
* trigger for flushes. So before we can make this work
* properly, we'll need to start using REQ_OP_FLUSH for pure
* flushes at the bio level like we already do in blk-mq.
*/
if (bio->bi_iter.bi_sector > inj->end ||
bio_end_sector(bio) <= inj->start)
continue;
if (inj->chance > 1 && (get_random_u32() % inj->chance) != 0)
continue;
pr_info_ratelimited("%pg: injecting %s error for %s at sector %llu:%u\n",
disk->part0, blk_status_to_str(inj->status),
blk_op_str(inj->op), bio->bi_iter.bi_sector,
bio_sectors(bio));
bio->bi_status = inj->status;
rcu_read_unlock();
bio_endio(bio);
return true;
}
rcu_read_unlock();
return false;
}
static int error_inject_add(struct gendisk *disk, enum req_op op,
sector_t start, u64 nr_sectors, blk_status_t status,
unsigned int chance)
{
struct blk_error_inject *inj;
int error = -EINVAL;
if (op == REQ_OP_LAST)
return -EINVAL;
if (status == BLK_STS_OK)
return -EINVAL;
inj = kzalloc_obj(*inj);
if (!inj)
return -ENOMEM;
if (nr_sectors) {
if (U64_MAX - nr_sectors < start)
goto out_free_inj;
inj->end = start + nr_sectors - 1;
} else {
inj->end = U64_MAX;
}
inj->op = op;
inj->start = start;
inj->status = status;
inj->chance = chance;
pr_debug_ratelimited("%pg: adding %s injection for %s at sector %llu:%llu\n",
disk->part0, blk_status_to_str(status),
blk_op_str(op),
start, nr_sectors);
/*
* Add to the front of the list so that newer entries can partially
* override other entries. This also intentionally allows duplicate
* entries as there is no real reason to reject them.
*/
mutex_lock(&disk->error_injection_lock);
if (!disk_live(disk)) {
mutex_unlock(&disk->error_injection_lock);
error = -ENODEV;
goto out_free_inj;
}
if (list_empty(&disk->error_injection_list))
static_branch_inc(&blk_error_injection_enabled);
list_add_rcu(&inj->entry, &disk->error_injection_list);
set_bit(GD_ERROR_INJECT, &disk->state);
mutex_unlock(&disk->error_injection_lock);
return 0;
out_free_inj:
kfree(inj);
return error;
}
static void error_inject_removeall(struct gendisk *disk)
{
struct blk_error_inject *inj;
mutex_lock(&disk->error_injection_lock);
clear_bit(GD_ERROR_INJECT, &disk->state);
while ((inj = list_first_entry_or_null(&disk->error_injection_list,
struct blk_error_inject, entry))) {
list_del_rcu(&inj->entry);
kfree_rcu_mightsleep(inj);
}
static_branch_dec(&blk_error_injection_enabled);
mutex_unlock(&disk->error_injection_lock);
}
enum options {
Opt_add = (1u << 0),
Opt_removeall = (1u << 1),
Opt_op = (1u << 16),
Opt_start = (1u << 17),
Opt_nr_sectors = (1u << 18),
Opt_status = (1u << 19),
Opt_chance = (1u << 20),
Opt_invalid,
};
static const match_table_t opt_tokens = {
{ Opt_add, "add", },
{ Opt_removeall, "removeall", },
{ Opt_op, "op=%s", },
{ Opt_start, "start=%u" },
{ Opt_nr_sectors, "nr_sectors=%u" },
{ Opt_status, "status=%s" },
{ Opt_chance, "chance=%u" },
{ Opt_invalid, NULL, },
};
static int match_op(substring_t *args, enum req_op *op)
{
const char *tag;
tag = match_strdup(args);
if (!tag)
return -ENOMEM;
*op = str_to_blk_op(tag);
if (*op == REQ_OP_LAST)
pr_warn("invalid op '%s'\n", tag);
kfree(tag);
return 0;
}
static int match_status(substring_t *args, blk_status_t *status)
{
const char *tag;
tag = match_strdup(args);
if (!tag)
return -ENOMEM;
*status = tag_to_blk_status(tag);
if (!*status)
pr_warn("invalid status '%s'\n", tag);
kfree(tag);
return 0;
}
static ssize_t blk_error_injection_parse_options(struct gendisk *disk,
char *options)
{
enum { Unset, Add, Removeall } action = Unset;
unsigned int option_mask = 0, chance = 1;
enum req_op op = REQ_OP_LAST;
u64 start = 0, nr_sectors = 0;
blk_status_t status = BLK_STS_OK;
substring_t args[MAX_OPT_ARGS];
char *p;
while ((p = strsep(&options, ",\n")) != NULL) {
int error = 0;
ssize_t token;
if (!*p)
continue;
token = match_token(p, opt_tokens, args);
option_mask |= token;
switch (token) {
case Opt_add:
if (action != Unset)
return -EINVAL;
action = Add;
break;
case Opt_removeall:
if (action != Unset)
return -EINVAL;
action = Removeall;
break;
case Opt_op:
error = match_op(args, &op);
break;
case Opt_start:
error = match_u64(args, &start);
break;
case Opt_nr_sectors:
error = match_u64(args, &nr_sectors);
break;
case Opt_status:
error = match_status(args, &status);
break;
case Opt_chance:
error = match_uint(args, &chance);
if (!error && chance == 0)
error = -EINVAL;
break;
default:
pr_warn("unknown parameter or missing value '%s'\n", p);
error = -EINVAL;
}
if (error)
return error;
}
switch (action) {
case Add:
return error_inject_add(disk, op, start, nr_sectors, status,
chance);
case Removeall:
if (option_mask & ~Opt_removeall)
return -EINVAL;
error_inject_removeall(disk);
return 0;
default:
return -EINVAL;
}
}
static ssize_t blk_error_injection_write(struct file *file,
const char __user *ubuf, size_t count, loff_t *pos)
{
struct gendisk *disk = file_inode(file)->i_private;
char *options;
int error;
options = memdup_user_nul(ubuf, count);
if (IS_ERR(options))
return PTR_ERR(options);
error = blk_error_injection_parse_options(disk, options);
kfree(options);
if (error)
return error;
return count;
}
static int blk_error_injection_show(struct seq_file *s, void *private)
{
struct gendisk *disk = s->private;
struct blk_error_inject *inj;
rcu_read_lock();
list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) {
seq_printf(s, "%llu:%llu status=%s,chance=%u",
inj->start, inj->end,
blk_status_to_tag(inj->status), inj->chance);
seq_putc(s, '\n');
}
rcu_read_unlock();
return 0;
}
static int blk_error_injection_open(struct inode *inode, struct file *file)
{
return single_open(file, blk_error_injection_show, inode->i_private);
}
static int blk_error_injection_release(struct inode *inode, struct file *file)
{
return single_release(inode, file);
}
static const struct file_operations blk_error_injection_fops = {
.owner = THIS_MODULE,
.write = blk_error_injection_write,
.read = seq_read,
.open = blk_error_injection_open,
.release = blk_error_injection_release,
};
void blk_error_injection_init(struct gendisk *disk)
{
debugfs_create_file("error_injection", 0600, disk->queue->debugfs_dir,
disk, &blk_error_injection_fops);
}
void blk_error_injection_exit(struct gendisk *disk)
{
error_inject_removeall(disk);
}
+21
View File
@@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BLK_ERROR_INJECTION_H
#define _BLK_ERROR_INJECTION_H 1
#include <linux/jump_label.h>
DECLARE_STATIC_KEY_FALSE(blk_error_injection_enabled);
void blk_error_injection_init(struct gendisk *disk);
void blk_error_injection_exit(struct gendisk *disk);
bool __blk_error_inject(struct bio *bio);
static inline bool blk_error_inject(struct bio *bio)
{
if (IS_ENABLED(CONFIG_BLK_ERROR_INJECTION) &&
static_branch_unlikely(&blk_error_injection_enabled) &&
test_bit(GD_ERROR_INJECT, &bio->bi_bdev->bd_disk->state))
return __blk_error_inject(bio);
return false;
}
#endif /* _BLK_ERROR_INJECTION_H */
+4
View File
@@ -1485,6 +1485,10 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
INIT_LIST_HEAD(&disk->slave_bdevs);
#endif
#ifdef CONFIG_BLK_ERROR_INJECTION
mutex_init(&disk->error_injection_lock);
INIT_LIST_HEAD(&disk->error_injection_list);
#endif
mutex_init(&disk->rqos_state_mutex);
kobject_init(&disk->queue_kobj, &blk_queue_ktype);
+6
View File
@@ -176,6 +176,7 @@ struct gendisk {
#define GD_SUPPRESS_PART_SCAN 5
#define GD_OWNS_QUEUE 6
#define GD_ZONE_APPEND_USED 7
#define GD_ERROR_INJECT 8
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
@@ -227,6 +228,11 @@ struct gendisk {
*/
struct blk_independent_access_ranges *ia_ranges;
#ifdef CONFIG_BLK_ERROR_INJECTION
struct mutex error_injection_lock;
struct list_head error_injection_list;
#endif
struct mutex rqos_state_mutex; /* rqos state change mutex */
};