ext4: hold s_fc_lock while during fast commit

Leaving s_fc_lock in between during commit in ext4_fc_perform_commit()
function leaves room for subtle concurrency bugs where ext4_fc_del() may
delete an inode from the fast commit list, leaving list in an inconsistent
state.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250508175908.1004880-10-harshadshirwadkar@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
Harshad Shirwadkar
2025-05-08 17:59:08 +00:00
committed by Theodore Ts'o
parent 12e64e7f85
commit 6593714d67

View File

@@ -424,6 +424,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
node->fcd_ino = inode->i_ino;
take_dentry_name_snapshot(&node->fcd_name, dentry);
INIT_LIST_HEAD(&node->fcd_dilist);
INIT_LIST_HEAD(&node->fcd_list);
mutex_lock(&sbi->s_fc_lock);
if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
@@ -985,8 +986,6 @@ static int ext4_fc_flush_data(journal_t *journal)
/* Commit all the directory entry updates */
static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
__acquires(&sbi->s_fc_lock)
__releases(&sbi->s_fc_lock)
{
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1000,26 +999,22 @@ __releases(&sbi->s_fc_lock)
list_for_each_entry_safe(fc_dentry, fc_dentry_n,
&sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
mutex_unlock(&sbi->s_fc_lock);
if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
ret = -ENOSPC;
goto lock_and_exit;
}
mutex_lock(&sbi->s_fc_lock);
if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
return -ENOSPC;
continue;
}
/*
* With fcd_dilist we need not loop in sbi->s_fc_q to get the
* corresponding inode pointer
* corresponding inode. Also, the corresponding inode could have been
* deleted, in which case, we don't need to do anything.
*/
WARN_ON(list_empty(&fc_dentry->fcd_dilist));
if (list_empty(&fc_dentry->fcd_dilist))
continue;
ei = list_first_entry(&fc_dentry->fcd_dilist,
struct ext4_inode_info, i_fc_dilist);
inode = &ei->vfs_inode;
WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
mutex_unlock(&sbi->s_fc_lock);
/*
* We first write the inode and then the create dirent. This
* allows the recovery code to create an unnamed inode first
@@ -1029,23 +1024,14 @@ __releases(&sbi->s_fc_lock)
*/
ret = ext4_fc_write_inode(inode, crc);
if (ret)
goto lock_and_exit;
return ret;
ret = ext4_fc_write_inode_data(inode, crc);
if (ret)
goto lock_and_exit;
if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
ret = -ENOSPC;
goto lock_and_exit;
}
mutex_lock(&sbi->s_fc_lock);
return ret;
if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
return -ENOSPC;
}
return 0;
lock_and_exit:
mutex_lock(&sbi->s_fc_lock);
return ret;
}
static int ext4_fc_perform_commit(journal_t *journal)
@@ -1148,10 +1134,8 @@ static int ext4_fc_perform_commit(journal_t *journal)
/* Step 6.2: Now write all the dentry updates. */
mutex_lock(&sbi->s_fc_lock);
ret = ext4_fc_commit_dentry_updates(journal, &crc);
if (ret) {
mutex_unlock(&sbi->s_fc_lock);
if (ret)
goto out;
}
/* Step 6.3: Now write all the changed inodes to disk. */
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
@@ -1159,7 +1143,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
continue;
mutex_unlock(&sbi->s_fc_lock);
ret = ext4_fc_write_inode_data(inode, &crc);
if (ret)
goto out;
@@ -1171,6 +1154,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
ret = ext4_fc_write_tail(sb, crc);
out:
mutex_unlock(&sbi->s_fc_lock);
blk_finish_plug(&plug);
return ret;
}
@@ -1353,11 +1337,9 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
fcd_list);
list_del_init(&fc_dentry->fcd_list);
list_del_init(&fc_dentry->fcd_dilist);
mutex_unlock(&sbi->s_fc_lock);
release_dentry_name_snapshot(&fc_dentry->fcd_name);
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
mutex_lock(&sbi->s_fc_lock);
}
list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],