NFSv4/flexfiles: Fix handling of NFS level errors in I/O
[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ]
Allow the flexfiles error handling to recognise NFS level errors (as
opposed to RPC level errors) and handle them separately. The main
motivator is the NFSERR_PERM errors that get returned if the NFS client
connects to the data server through a port number that is lower than
1024. In that case, the client should disconnect and retry a READ on a
different data server, or it should retry a WRITE after reconnecting.
Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Fixes: d67ae825a5
("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
(cherry picked from commit 204bdc7a8b7b7b7fcfc500f1402762a1f99e00bc)
This commit is contained in:
parent
3819e35dc9
commit
64bc605c55
|
@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
|
|||
}
|
||||
|
||||
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
||||
u32 op_status,
|
||||
struct nfs4_state *state,
|
||||
struct nfs_client *clp,
|
||||
struct pnfs_layout_segment *lseg,
|
||||
|
@ -1115,32 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
||||
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
|
||||
|
||||
switch (task->tk_status) {
|
||||
case -NFS4ERR_BADSESSION:
|
||||
case -NFS4ERR_BADSLOT:
|
||||
case -NFS4ERR_BAD_HIGH_SLOT:
|
||||
case -NFS4ERR_DEADSESSION:
|
||||
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
|
||||
case -NFS4ERR_SEQ_FALSE_RETRY:
|
||||
case -NFS4ERR_SEQ_MISORDERED:
|
||||
switch (op_status) {
|
||||
case NFS4_OK:
|
||||
case NFS4ERR_NXIO:
|
||||
break;
|
||||
case NFSERR_PERM:
|
||||
if (!task->tk_xprt)
|
||||
break;
|
||||
xprt_force_disconnect(task->tk_xprt);
|
||||
goto out_retry;
|
||||
case NFS4ERR_BADSESSION:
|
||||
case NFS4ERR_BADSLOT:
|
||||
case NFS4ERR_BAD_HIGH_SLOT:
|
||||
case NFS4ERR_DEADSESSION:
|
||||
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
|
||||
case NFS4ERR_SEQ_FALSE_RETRY:
|
||||
case NFS4ERR_SEQ_MISORDERED:
|
||||
dprintk("%s ERROR %d, Reset session. Exchangeid "
|
||||
"flags 0x%x\n", __func__, task->tk_status,
|
||||
clp->cl_exchange_flags);
|
||||
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
|
||||
break;
|
||||
case -NFS4ERR_DELAY:
|
||||
case -NFS4ERR_GRACE:
|
||||
goto out_retry;
|
||||
case NFS4ERR_DELAY:
|
||||
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
|
||||
fallthrough;
|
||||
case NFS4ERR_GRACE:
|
||||
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
|
||||
break;
|
||||
case -NFS4ERR_RETRY_UNCACHED_REP:
|
||||
break;
|
||||
goto out_retry;
|
||||
case NFS4ERR_RETRY_UNCACHED_REP:
|
||||
goto out_retry;
|
||||
/* Invalidate Layout errors */
|
||||
case -NFS4ERR_PNFS_NO_LAYOUT:
|
||||
case -ESTALE: /* mapped NFS4ERR_STALE */
|
||||
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
|
||||
case -EISDIR: /* mapped NFS4ERR_ISDIR */
|
||||
case -NFS4ERR_FHEXPIRED:
|
||||
case -NFS4ERR_WRONG_TYPE:
|
||||
case NFS4ERR_PNFS_NO_LAYOUT:
|
||||
case NFS4ERR_STALE:
|
||||
case NFS4ERR_BADHANDLE:
|
||||
case NFS4ERR_ISDIR:
|
||||
case NFS4ERR_FHEXPIRED:
|
||||
case NFS4ERR_WRONG_TYPE:
|
||||
dprintk("%s Invalid layout error %d\n", __func__,
|
||||
task->tk_status);
|
||||
/*
|
||||
|
@ -1153,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||
pnfs_destroy_layout(NFS_I(inode));
|
||||
rpc_wake_up(&tbl->slot_tbl_waitq);
|
||||
goto reset;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (task->tk_status) {
|
||||
/* RPC connection errors */
|
||||
case -ECONNREFUSED:
|
||||
case -EHOSTDOWN:
|
||||
|
@ -1168,26 +1184,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
||||
&devid->deviceid);
|
||||
rpc_wake_up(&tbl->slot_tbl_waitq);
|
||||
fallthrough;
|
||||
break;
|
||||
default:
|
||||
if (ff_layout_avoid_mds_available_ds(lseg))
|
||||
return -NFS4ERR_RESET_TO_PNFS;
|
||||
reset:
|
||||
dprintk("%s Retry through MDS. Error %d\n", __func__,
|
||||
task->tk_status);
|
||||
return -NFS4ERR_RESET_TO_MDS;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ff_layout_avoid_mds_available_ds(lseg))
|
||||
return -NFS4ERR_RESET_TO_PNFS;
|
||||
reset:
|
||||
dprintk("%s Retry through MDS. Error %d\n", __func__,
|
||||
task->tk_status);
|
||||
return -NFS4ERR_RESET_TO_MDS;
|
||||
|
||||
out_retry:
|
||||
task->tk_status = 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
|
||||
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
|
||||
u32 op_status,
|
||||
struct nfs_client *clp,
|
||||
struct pnfs_layout_segment *lseg,
|
||||
u32 idx)
|
||||
{
|
||||
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
||||
|
||||
switch (op_status) {
|
||||
case NFS_OK:
|
||||
case NFSERR_NXIO:
|
||||
break;
|
||||
case NFSERR_PERM:
|
||||
if (!task->tk_xprt)
|
||||
break;
|
||||
xprt_force_disconnect(task->tk_xprt);
|
||||
goto out_retry;
|
||||
case NFSERR_ACCES:
|
||||
case NFSERR_BADHANDLE:
|
||||
case NFSERR_FBIG:
|
||||
case NFSERR_IO:
|
||||
case NFSERR_NOSPC:
|
||||
case NFSERR_ROFS:
|
||||
case NFSERR_STALE:
|
||||
goto out_reset_to_pnfs;
|
||||
case NFSERR_JUKEBOX:
|
||||
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
|
||||
goto out_retry;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (task->tk_status) {
|
||||
/* File access problems. Don't mark the device as unavailable */
|
||||
case -EACCES:
|
||||
|
@ -1206,6 +1252,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
|
|||
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
||||
&devid->deviceid);
|
||||
}
|
||||
out_reset_to_pnfs:
|
||||
/* FIXME: Need to prevent infinite looping here. */
|
||||
return -NFS4ERR_RESET_TO_PNFS;
|
||||
out_retry:
|
||||
|
@ -1216,6 +1263,7 @@ out_retry:
|
|||
}
|
||||
|
||||
static int ff_layout_async_handle_error(struct rpc_task *task,
|
||||
u32 op_status,
|
||||
struct nfs4_state *state,
|
||||
struct nfs_client *clp,
|
||||
struct pnfs_layout_segment *lseg,
|
||||
|
@ -1234,10 +1282,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
|
|||
|
||||
switch (vers) {
|
||||
case 3:
|
||||
return ff_layout_async_handle_error_v3(task, lseg, idx);
|
||||
case 4:
|
||||
return ff_layout_async_handle_error_v4(task, state, clp,
|
||||
return ff_layout_async_handle_error_v3(task, op_status, clp,
|
||||
lseg, idx);
|
||||
case 4:
|
||||
return ff_layout_async_handle_error_v4(task, op_status, state,
|
||||
clp, lseg, idx);
|
||||
default:
|
||||
/* should never happen */
|
||||
WARN_ON_ONCE(1);
|
||||
|
@ -1290,6 +1339,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
|
|||
switch (status) {
|
||||
case NFS4ERR_DELAY:
|
||||
case NFS4ERR_GRACE:
|
||||
case NFS4ERR_PERM:
|
||||
break;
|
||||
case NFS4ERR_NXIO:
|
||||
ff_layout_mark_ds_unreachable(lseg, idx);
|
||||
|
@ -1322,7 +1372,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
|
|||
trace_ff_layout_read_error(hdr);
|
||||
}
|
||||
|
||||
err = ff_layout_async_handle_error(task, hdr->args.context->state,
|
||||
err = ff_layout_async_handle_error(task, hdr->res.op_status,
|
||||
hdr->args.context->state,
|
||||
hdr->ds_clp, hdr->lseg,
|
||||
hdr->pgio_mirror_idx);
|
||||
|
||||
|
@ -1492,7 +1543,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
|
|||
trace_ff_layout_write_error(hdr);
|
||||
}
|
||||
|
||||
err = ff_layout_async_handle_error(task, hdr->args.context->state,
|
||||
err = ff_layout_async_handle_error(task, hdr->res.op_status,
|
||||
hdr->args.context->state,
|
||||
hdr->ds_clp, hdr->lseg,
|
||||
hdr->pgio_mirror_idx);
|
||||
|
||||
|
@ -1538,8 +1590,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
|
|||
trace_ff_layout_commit_error(data);
|
||||
}
|
||||
|
||||
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
|
||||
data->lseg, data->ds_commit_index);
|
||||
err = ff_layout_async_handle_error(task, data->res.op_status,
|
||||
NULL, data->ds_clp, data->lseg,
|
||||
data->ds_commit_index);
|
||||
|
||||
trace_nfs4_pnfs_commit_ds(data, err);
|
||||
switch (err) {
|
||||
|
|
Loading…
Reference in New Issue