NFSv4/flexfiles: Fix handling of NFS level errors in I/O

[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ]

Allow the flexfiles error handling to recognise NFS level errors (as
opposed to RPC level errors) and handle them separately. The main
motivator is the NFSERR_PERM errors that get returned if the NFS client
connects to the data server through a port number that is lower than
1024. In that case, the client should disconnect and retry a READ on a
different data server, or it should retry a WRITE after reconnecting.

Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Fixes: d67ae825a5 ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 204bdc7a8b7b7b7fcfc500f1402762a1f99e00bc)
This commit is contained in:
Trond Myklebust 2025-06-19 15:16:11 -04:00 committed by Wentao Guan
parent 3819e35dc9
commit 64bc605c55
1 changed files with 87 additions and 34 deletions

View File

@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@ -1115,32 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) {
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
switch (op_status) {
case NFS4_OK:
case NFS4ERR_NXIO:
break;
case NFSERR_PERM:
if (!task->tk_xprt)
break;
xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFS4ERR_BADSESSION:
case NFS4ERR_BADSLOT:
case NFS4ERR_BAD_HIGH_SLOT:
case NFS4ERR_DEADSESSION:
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case NFS4ERR_SEQ_FALSE_RETRY:
case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
goto out_retry;
case NFS4ERR_DELAY:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
goto out_retry;
case NFS4ERR_RETRY_UNCACHED_REP:
goto out_retry;
/* Invalidate Layout errors */
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
case -EISDIR: /* mapped NFS4ERR_ISDIR */
case -NFS4ERR_FHEXPIRED:
case -NFS4ERR_WRONG_TYPE:
case NFS4ERR_PNFS_NO_LAYOUT:
case NFS4ERR_STALE:
case NFS4ERR_BADHANDLE:
case NFS4ERR_ISDIR:
case NFS4ERR_FHEXPIRED:
case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@ -1153,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
default:
break;
}
switch (task->tk_status) {
/* RPC connection errors */
case -ECONNREFUSED:
case -EHOSTDOWN:
@ -1168,26 +1184,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
fallthrough;
break;
default:
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
break;
}
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
switch (op_status) {
case NFS_OK:
case NFSERR_NXIO:
break;
case NFSERR_PERM:
if (!task->tk_xprt)
break;
xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFSERR_ACCES:
case NFSERR_BADHANDLE:
case NFSERR_FBIG:
case NFSERR_IO:
case NFSERR_NOSPC:
case NFSERR_ROFS:
case NFSERR_STALE:
goto out_reset_to_pnfs;
case NFSERR_JUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
default:
break;
}
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@ -1206,6 +1252,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@ -1216,6 +1263,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@ -1234,10 +1282,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
return ff_layout_async_handle_error_v3(task, lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, state, clp,
return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, op_status, state,
clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@ -1290,6 +1339,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@ -1322,7 +1372,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@ -1492,7 +1543,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@ -1538,8 +1590,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data);
}
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
data->lseg, data->ds_commit_index);
err = ff_layout_async_handle_error(task, data->res.op_status,
NULL, data->ds_clp, data->lseg,
data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {