[OpenMP] Use primary context in CUDA plugin

Summary:
Retaining per device primary context is preferred to creating a context owned by the plugin.

From CUDA documentation
1. Note that the use of multiple CUcontext s per device within a single process will substantially degrade performance and is strongly discouraged. Instead, it is highly recommended that the implicit one-to-one device-to-context mapping for the process provided by the CUDA Runtime API be used." from https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DRIVER.html
2. Right under cuCtxCreate. In most cases it is recommended to use cuDevicePrimaryCtxRetain. https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf
3. The primary context is unique per device and shared with the CUDA runtime API. These functions allow integration with other libraries using CUDA.  https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PRIMARY__CTX.html#group__CUDA__PRIMARY__CTX

Two issues are addressed by this patch:
1. Not using the primary context caused interoperability issue with libraries like cublas, cusolver. CUBLAS_STATUS_EXECUTION_FAILED and cudaErrorInvalidResourceHandle
2. On OLCF summit, "Error returned from cuCtxCreate" and "CUDA error is: invalid device ordinal"

Regarding the flags of the primary context. If it is inactive, we set CU_CTX_SCHED_BLOCKING_SYNC. If it is already active, we respect the current flags.

Reviewers: grokos, ABataev, jdoerfert, protze.joachim, AndreyChurbanov, Hahnfeld

Reviewed By: jdoerfert

Subscribers: openmp-commits, yaxunl, guansong, sstefan1, tianshilei1992

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D82718
This commit is contained in:
Ye Luo 2020-07-07 10:13:37 -04:00 committed by Shilei Tian
parent c9fb7f8171
commit c5348aecd7
1 changed files with 35 additions and 7 deletions

View File

@ -385,9 +385,15 @@ public:
for (DeviceDataTy &D : DeviceData) {
// Destroy context
if (D.Context)
checkResult(cuCtxDestroy(D.Context),
"Error returned from cuCtxDestroy\n");
if (D.Context) {
checkResult(cuCtxSetCurrent(D.Context),
"Error returned from cuCtxSetCurrent\n");
CUdevice Device;
checkResult(cuCtxGetDevice(&Device),
"Error returned from cuCtxGetDevice\n");
checkResult(cuDevicePrimaryCtxRelease(Device),
"Error returned from cuDevicePrimaryCtxRelease\n");
}
}
}
@ -408,10 +414,32 @@ public:
if (!checkResult(Err, "Error returned from cuDeviceGet\n"))
return OFFLOAD_FAIL;
// Create the context and save it to use whenever this device is selected.
Err = cuCtxCreate(&DeviceData[DeviceId].Context, CU_CTX_SCHED_BLOCKING_SYNC,
Device);
if (!checkResult(Err, "Error returned from cuCtxCreate\n"))
// Query the current flags of the primary context and set its flags if
// it is inactive
unsigned int FormerPrimaryCtxFlags = 0;
int FormerPrimaryCtxIsActive = 0;
Err = cuDevicePrimaryCtxGetState(Device, &FormerPrimaryCtxFlags,
&FormerPrimaryCtxIsActive);
if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxGetState\n"))
return OFFLOAD_FAIL;
if (FormerPrimaryCtxIsActive) {
DP("The primary context is active, no change to its flags\n");
if ((FormerPrimaryCtxFlags & CU_CTX_SCHED_MASK) !=
CU_CTX_SCHED_BLOCKING_SYNC)
DP("Warning the current flags are not CU_CTX_SCHED_BLOCKING_SYNC\n");
} else {
DP("The primary context is inactive, set its flags to "
"CU_CTX_SCHED_BLOCKING_SYNC\n");
Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC);
if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n"))
return OFFLOAD_FAIL;
}
// Retain the per device primary context and save it to use whenever this
// device is selected.
Err = cuDevicePrimaryCtxRetain(&DeviceData[DeviceId].Context, Device);
if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxRetain\n"))
return OFFLOAD_FAIL;
Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);