btrfs: propagate last_unlink_trans earlier when doing a rmdir

[ Upstream commit c466e33e729a0ee017d10d919cba18f503853c60 ] In case the removed directory had a snapshot that was deleted, we are propagating its inode's last_unlink_trans to the parent directory after we removed the entry from the parent directory. This leaves a small race window where someone can log the parent directory after we removed the entry and before we updated last_unlink_trans, and as a result if we ever try to replay such a log tree, we will fail since we will attempt to remove a snapshot during log replay, which is currently not possible and results in the log replay (and mount) to fail. This is the type of failure described in commit 1ec9a1ae1e ("Btrfs: fix unreplayable log after snapshot delete + parent dir fsync"). So fix this by propagating the last_unlink_trans to the parent directory before we remove the entry from it. Fixes: 44f714dae5 ("Btrfs: improve performance on fsync against new inode after rename/unlink") Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Sasha Levin <sashal@kernel.org> (cherry picked from commit d77a16802896bcc0cbe66facf87d7af57262ea1a)
2025-06-20 15:54:05 +01:00 · 2025-06-20 15:54:05 +01:00 · 2dbd663f91
parent b0f4ec74dc
commit 2dbd663f91
1 changed files with 18 additions and 18 deletions
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@ -4617,7 +4617,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	int ret = 0;
 	struct btrfs_trans_handle *trans;
-	u64 last_unlink_trans;
 	struct fscrypt_name fname;

 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
@ -4643,6 +4642,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 		goto out_notrans;
 	}

+	/*
+	 * Propagate the last_unlink_trans value of the deleted dir to its
+	 * parent directory. This is to prevent an unrecoverable log tree in the
+	 * case we do something like this:
+	 * 1) create dir foo
+	 * 2) create snapshot under dir foo
+	 * 3) delete the snapshot
+	 * 4) rmdir foo
+	 * 5) mkdir foo
+	 * 6) fsync foo or some file inside foo
+	 *
+	 * This is because we can't unlink other roots when replaying the dir
+	 * deletes for directory foo.
+	 */
+	if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
+		BTRFS_I(dir)->last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
 	if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
 		ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
 		goto out;
@ -4652,27 +4668,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (ret)
 		goto out;

-	last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
 	/* now the directory is empty */
 	ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
 				 &fname.disk_name);
-	if (!ret) {
+	if (!ret)
 		btrfs_i_size_write(BTRFS_I(inode), 0);
-		/*
-		 * Propagate the last_unlink_trans value of the deleted dir to
-		 * its parent directory. This is to prevent an unrecoverable
-		 * log tree in the case we do something like this:
-		 * 1) create dir foo
-		 * 2) create snapshot under dir foo
-		 * 3) delete the snapshot
-		 * 4) rmdir foo
-		 * 5) mkdir foo
-		 * 6) fsync foo or some file inside foo
-		 */
-		if (last_unlink_trans >= trans->transid)
-			BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
-	}
 out:
 	btrfs_end_transaction(trans);
 out_notrans: