mirror of https://github.com/Jittor/Jittor
Debug nan.
This commit is contained in:
parent
da6acc6cc3
commit
c9c02508d4
|
@ -9,7 +9,7 @@
|
|||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
|
||||
__version__ = '1.3.2412.29'
|
||||
__version__ = '1.3.2415.29'
|
||||
from jittor_utils import lock
|
||||
with lock.lock_scope():
|
||||
ori_int = int
|
||||
|
|
|
@ -328,6 +328,7 @@ namespace jittor
|
|||
op.add(bop->z, false);
|
||||
op.run();
|
||||
bop->x->shape = xshape_bk;
|
||||
// aclrtSynchronizeStream(aclstream);
|
||||
}
|
||||
else if (op->name() == string("fuse_transpose"))
|
||||
{
|
||||
|
|
|
@ -626,14 +626,14 @@ namespace jittor
|
|||
|
||||
// 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改
|
||||
// destroy tensor
|
||||
for (int idx = 0; idx < input_num; idx++)
|
||||
{
|
||||
aclDestroyTensor(inputTensors[idx]);
|
||||
}
|
||||
for (int idx = 0; idx < output_num; idx++)
|
||||
{
|
||||
aclDestroyTensor(outputTensors[idx]);
|
||||
}
|
||||
// for (int idx = 0; idx < input_num; idx++)
|
||||
// {
|
||||
// aclDestroyTensor(inputTensors[idx]);
|
||||
// }
|
||||
// for (int idx = 0; idx < output_num; idx++)
|
||||
// {
|
||||
// aclDestroyTensor(outputTensors[idx]);
|
||||
// }
|
||||
// destroy scalar
|
||||
// aclDestroyScalar(start);
|
||||
// aclDestroyScalar(end);
|
||||
|
|
|
@ -312,6 +312,7 @@ class SGD(Optimizer):
|
|||
|
||||
# optimize main body
|
||||
for p, g, v in zip(pg["params"], pg["grads"], pg["values"]):
|
||||
# print(p.shape, g.shape, v.shape)
|
||||
if p.is_stop_grad(): continue
|
||||
dp = p * weight_decay + g
|
||||
v.update(momentum * v + dp * (1 - dampening))
|
||||
|
|
|
@ -27,6 +27,7 @@ namespace array_local {
|
|||
cudaStream_t stream;
|
||||
cudaEvent_t event;
|
||||
|
||||
|
||||
struct Init {
|
||||
Init() {
|
||||
if (!get_device_count()) return;
|
||||
|
@ -103,8 +104,11 @@ void ArrayOp::run() {
|
|||
auto host_ptr = cuda_dual_allocator.get_dual_allocation(allocation.allocation).host_ptr;
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
allocation.ptr, host_ptr, allocation.size, cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaEventRecord(event, stream));
|
||||
checkCudaErrors(cudaStreamWaitEvent(0, event, 0));
|
||||
// checkCudaErrors(aclrtMemcpyAsync(
|
||||
// allocation.ptr, allocation.size, host_ptr, allocation.size, cudaMemcpyHostToDevice, aclstream));
|
||||
// checkCudaErrors(cudaEventRecord(event, stream));
|
||||
// checkCudaErrors(cudaStreamWaitEvent(0, event, 0));
|
||||
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
// delay free this allocation
|
||||
allocation.allocator = &delay_free;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
namespace jittor {
|
||||
|
||||
EXTERN_LIB aclrtStream aclstream;
|
||||
|
||||
CopyOp::CopyOp(Var* x) {
|
||||
flags.set(NodeFlags::_cpu);
|
||||
flags.set(NodeFlags::_cuda);
|
||||
|
@ -42,6 +44,8 @@ void CopyOp::run() {
|
|||
#ifdef HAS_CUDA
|
||||
if (flags.get(NodeFlags::_cuda)) {
|
||||
checkCudaErrors(cudaMemcpyAsync(y_ptr, x_ptr, size, cudaMemcpyDeviceToDevice, 0));
|
||||
// checkCudaErrors(aclrtMemcpyAsync(y_ptr, size, x_ptr, size, cudaMemcpyDeviceToDevice, aclstream));
|
||||
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
|
|
@ -123,8 +123,11 @@ void FetchOp::run() {
|
|||
new (&allocation) Allocation(&cuda_dual_allocator, v->size);
|
||||
// mostly device to device
|
||||
#if IS_CUDA
|
||||
// checkCudaErrors(cudaMemcpyAsync(
|
||||
// allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
||||
allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
|
||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
#else
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream));
|
||||
|
@ -132,8 +135,11 @@ void FetchOp::run() {
|
|||
auto host_ptr = cuda_dual_allocator.get_dual_allocation(
|
||||
allocation.allocation).host_ptr;
|
||||
// device to host
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
||||
// checkCudaErrors(cudaMemcpyAsync(
|
||||
// host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
||||
checkCudaErrors(aclrtMemcpyAsync(
|
||||
host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
|
||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
allocation.ptr = host_ptr;
|
||||
has_cuda_memcpy = true;
|
||||
} else
|
||||
|
|
|
@ -330,6 +330,8 @@ void SetitemOp::jit_run() {
|
|||
#else
|
||||
if (op != ip)
|
||||
checkCudaErrors(cudaMemcpyAsync(op, ip, out->size, cudaMemcpyDeviceToDevice, 0));
|
||||
// checkCudaErrors(aclrtMemcpyAsync(op, out->size, ip, out->size, cudaMemcpyDeviceToDevice, aclstream));
|
||||
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
#endif
|
||||
|
||||
if (ns.get(GetitemOp::_inplace) &&
|
||||
|
|
Loading…
Reference in New Issue