smb: client: fix potential deadlock when reconnecting channels

[ Upstream commit 711741f94ac3cf9f4e3aa73aa171e76d188c0819 ]

Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order
and prevent the following deadlock from happening

======================================================
WARNING: possible circular locking dependency detected
6.16.0-rc3-build2+ #1301 Tainted: G S      W
------------------------------------------------------
cifsd/6055 is trying to acquire lock:
ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200

but task is already holding lock:
ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&ret_buf->chan_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_setup_session+0x81/0x4b0
       cifs_get_smb_ses+0x771/0x900
       cifs_mount_get_session+0x7e/0x170
       cifs_mount+0x92/0x2d0
       cifs_smb3_do_mount+0x161/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #1 (&ret_buf->ses_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_match_super+0x101/0x320
       sget+0xab/0x270
       cifs_smb3_do_mount+0x1e0/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}:
       check_noncircular+0x95/0xc0
       check_prev_add+0x115/0x2f0
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_signal_cifsd_for_reconnect+0x134/0x200
       __cifs_reconnect+0x8f/0x500
       cifs_handle_standard+0x112/0x280
       cifs_demultiplex_thread+0x64d/0xbc0
       kthread+0x2f7/0x310
       ret_from_fork+0x2a/0x230
       ret_from_fork_asm+0x1a/0x30

other info that might help us debug this:

Chain exists of:
  &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&ret_buf->chan_lock);
                               lock(&ret_buf->ses_lock);
                               lock(&ret_buf->chan_lock);
  lock(&tcp_ses->srv_lock);

 *** DEADLOCK ***

3 locks held by cifsd/6055:
 #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200
 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200
 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

Cc: linux-cifs@vger.kernel.org
Reported-by: David Howells <dhowells@redhat.com>
Fixes: d7d7a66aac ("cifs: avoid use of global locks for high contention data")
Reviewed-by: David Howells <dhowells@redhat.com>
Tested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit c82c7041258d96e3286f6790ab700e4edd3cc9e3)
This commit is contained in:
Paulo Alcantara 2025-06-25 12:22:38 -03:00 committed by Wentao Guan
parent 5c6757a8dd
commit 0fd466beef
2 changed files with 37 additions and 22 deletions

View File

@ -677,6 +677,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
struct list_head rlist; /* reconnect list */
spinlock_t srv_lock; /* protect anything here that is not protected */
__u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */

View File

@ -140,6 +140,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
#define set_need_reco(server) \
do { \
spin_lock(&server->srv_lock); \
if (server->tcpStatus != CifsExiting) \
server->tcpStatus = CifsNeedReconnect; \
spin_unlock(&server->srv_lock); \
} while (0)
/*
* Update the tcpStatus for the server.
* This is used to signal the cifsd thread to call cifs_reconnect
@ -153,39 +161,45 @@ void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels)
{
struct TCP_Server_Info *pserver;
struct TCP_Server_Info *nserver;
struct cifs_ses *ses;
LIST_HEAD(reco);
int i;
/* If server is a channel, select the primary channel */
pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
/* if we need to signal just this channel */
if (!all_channels) {
spin_lock(&server->srv_lock);
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedReconnect;
spin_unlock(&server->srv_lock);
set_need_reco(server);
return;
}
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (cifs_ses_exiting(ses))
continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
if (SERVER_IS_CHAN(server))
server = server->primary_server;
scoped_guard(spinlock, &cifs_tcp_ses_lock) {
set_need_reco(server);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
spin_lock(&ses->ses_lock);
if (ses->ses_status == SES_EXITING) {
spin_unlock(&ses->ses_lock);
continue;
spin_lock(&ses->chans[i].server->srv_lock);
if (ses->chans[i].server->tcpStatus != CifsExiting)
ses->chans[i].server->tcpStatus = CifsNeedReconnect;
spin_unlock(&ses->chans[i].server->srv_lock);
}
spin_lock(&ses->chan_lock);
for (i = 1; i < ses->chan_count; i++) {
nserver = ses->chans[i].server;
if (!nserver)
continue;
nserver->srv_count++;
list_add(&nserver->rlist, &reco);
}
spin_unlock(&ses->chan_lock);
spin_unlock(&ses->ses_lock);
}
spin_unlock(&ses->chan_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(server, nserver, &reco, rlist) {
list_del_init(&server->rlist);
set_need_reco(server);
cifs_put_tcp_session(server, 0);
}
}
/*