mirror of https://github.com/Jittor/Jittor
add nan checker
This commit is contained in:
parent
b4a01c9b57
commit
d2c5d04ecf
|
@ -74,10 +74,17 @@ def compile(compiler, flags, inputs, output, combind_build=False):
|
|||
for input, obj_file in zip(inputs, obj_files):
|
||||
cc = compiler
|
||||
nflags = oflags
|
||||
if has_cuda and input.endswith(".cu"):
|
||||
nflags = convert_nvcc_flags(oflags)
|
||||
cc = nvcc_path
|
||||
if input.endswith(".cu"):
|
||||
if has_cuda:
|
||||
nflags = convert_nvcc_flags(oflags)
|
||||
cc = nvcc_path
|
||||
else:
|
||||
continue
|
||||
cmd = f"{cc} {input} {nflags} -c {lto_flags} -o {obj_file}"
|
||||
if "nan_checker" in input:
|
||||
# nan checker needs to disable fast_math
|
||||
cmd = cmd.replace("--use_fast_math", "")
|
||||
cmd = cmd.replace("-Ofast", "-O2")
|
||||
cmds.append(cmd)
|
||||
jit_utils.run_cmds(cmds, cache_path, jittor_path, "Compiling "+base_output)
|
||||
cmd = f"{compiler} {' '.join(obj_files)} {flags} {lto_flags} {link} -o {output}"
|
||||
|
@ -945,7 +952,7 @@ pyjt_gen_src = pyjt_compiler.compile(cache_path, jittor_path)
|
|||
# 3. op_utils
|
||||
# 4. other
|
||||
files2 = pyjt_gen_src
|
||||
files4 = run_cmd('find -L src | grep "cc$"', jittor_path).splitlines()
|
||||
files4 = run_cmd('find -L src | grep "c[cu]$"', jittor_path).splitlines()
|
||||
at_beginning = [
|
||||
"src/ops/op_utils.cc",
|
||||
"src/event_queue.cc",
|
||||
|
|
|
@ -627,12 +627,11 @@ def gather(x,dim,index):
|
|||
return x.reindex(ins)
|
||||
jt.Var.gather = gather
|
||||
|
||||
def prod(x,dim=0):
|
||||
def _prod(x,dim=0):
|
||||
x = jt.log(x)
|
||||
x = x.sum(dim=dim)
|
||||
return jt.exp(x)
|
||||
|
||||
jt.Var.prod = prod
|
||||
|
||||
def cumsum_forward(np, data):
|
||||
a = data['inputs'][0]
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "fuser.h"
|
||||
#include "profiler/profiler_guard.h"
|
||||
#include "parallel_compiler.h"
|
||||
#include "misc/nan_checker.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
@ -46,7 +47,10 @@ void load_fused_op(FusedOp& fused_op, vector<int>& fuse_ops, vector<Op*>& ops, i
|
|||
for (Op* op : fused_op.ops) {
|
||||
uint fid1 = op->custom_data;
|
||||
int iid = 0;
|
||||
for (Var* v : op->inputs()) {
|
||||
for (auto ve : op->_inputs) {
|
||||
// this is a control dependency edge, dont used
|
||||
if (ve.back->index<0) continue;
|
||||
auto v = ve.node->var();
|
||||
iid++;
|
||||
int iop_id;
|
||||
int iv_id;
|
||||
|
@ -450,6 +454,8 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
|
|||
if (use_cuda)
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
#endif
|
||||
for (Var* var : op->outputs())
|
||||
check_nan(var);
|
||||
}
|
||||
LOGvvv << "Finished Op(" >> op->name() << rid >>
|
||||
"/" >> queue.size() >> ") output:" << op->outputs();
|
||||
|
|
|
@ -22,6 +22,7 @@ static auto make_number = get_op_info("number")
|
|||
|
||||
VarPtr make_grad(Op* op, Var* out, Var* dout, Var* x, int x_index) {
|
||||
if (dout == nullptr) return nullptr;
|
||||
if (x_index<0) return nullptr;
|
||||
LOGvvvv << "Make grad op:" >> op->name() << "inputs:" >> op->inputs()
|
||||
<< "out:" >> out << "dout:" >> dout << "x:" >> x << "xid:" >> x_index;
|
||||
auto dx = op->grad(out, dout, x, x_index);
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <cfloat>
|
||||
#include <cmath>
|
||||
#include "misc/nan_checker.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator.h"
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
||||
#ifdef HAS_CUDA
|
||||
extern void check_nan_float32(float32* ptr, int64 num);
|
||||
extern void check_nan_float64(float64* ptr, int64 num);
|
||||
#endif
|
||||
|
||||
bool check_nan(Var* v) {
|
||||
if (!v->dtype().is_float()) return true;
|
||||
if (v->input() && (
|
||||
v->input()->name() == string("empty") ||
|
||||
v->input()->name() == string("setitem")))
|
||||
return true;
|
||||
#ifdef HAS_CUDA
|
||||
if (v->allocator->is_cuda()) {
|
||||
if (v->dtype() == ns_float32) {
|
||||
check_nan_float32((float32*)v->mem_ptr, v->num);
|
||||
} else
|
||||
if (v->dtype() == ns_float64) {
|
||||
check_nan_float64((float64*)v->mem_ptr, v->num);
|
||||
}
|
||||
ASSERT(cudaDeviceSynchronize()==0) << "detect nan or inf at" << v;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
if (v->dtype() == ns_float32) {
|
||||
auto* __restrict__ ptr = v->ptr<float32>();
|
||||
auto num = v->num;
|
||||
bool ok = true;
|
||||
int64 i=0;
|
||||
for (; i<num; i++) {
|
||||
if (std::isinf(ptr[i]) || std::isnan(ptr[i])) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT(ok) << "detect nan at index" << i << v;
|
||||
}
|
||||
if (v->dtype() == ns_float64) {
|
||||
auto* __restrict__ ptr = v->ptr<float64>();
|
||||
auto num = v->num;
|
||||
bool ok = true;
|
||||
int64 i=0;
|
||||
for (; i<num; i++) {
|
||||
if (std::isinf(ptr[i]) || std::isnan(ptr[i])) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT(ok) << "detect nan at index" << i << v;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "misc/nan_checker.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include "helper_cuda.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
||||
#ifdef HAS_CUDA
|
||||
__global__ void _check_nan_float32(float32* __restrict__ ptr, int64 num) {
|
||||
int64 i = threadIdx.x + blockIdx.x * (int64)blockDim.x;
|
||||
if (i<num) {
|
||||
if (::isnan(ptr[i]) || ::isinf(ptr[i]))
|
||||
__trap();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void _check_nan_float64(float64* __restrict__ ptr, int64 num) {
|
||||
int64 i = threadIdx.x + blockIdx.x * (int64)blockDim.x;
|
||||
if (i<num) {
|
||||
if (::isnan(ptr[i]) || ::isinf(ptr[i]))
|
||||
__trap();
|
||||
}
|
||||
}
|
||||
|
||||
void check_nan_float64(float64* ptr, int64 num) {
|
||||
int block_num = std::max((int64)1, (num-1)/1024+1);
|
||||
int thread_num = std::min((int64)1024, num);
|
||||
_check_nan_float64<<<block_num, thread_num>>>(ptr, num);
|
||||
}
|
||||
|
||||
void check_nan_float32(float32* ptr, int64 num) {
|
||||
int block_num = std::max((int64)1, (num-1)/1024+1);
|
||||
int thread_num = std::min((int64)1024, num);
|
||||
_check_nan_float32<<<block_num, thread_num>>>(ptr, num);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "var.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
bool check_nan(Var* v);
|
||||
|
||||
}
|
|
@ -16,6 +16,17 @@ inline static bool fast_strcmp(const char* a, const char* b) {
|
|||
return !*b;
|
||||
}
|
||||
|
||||
// add dependency b -> a
|
||||
static inline void add_dependency(Node* a, const vector<Node*>& b) {
|
||||
a->add_inputs(b);
|
||||
auto edge = a->_inputs.end();
|
||||
for (int i=0; i<b.size(); i++) {
|
||||
edge = std::prev(edge);
|
||||
// set -1 mean this is a control dependency edge
|
||||
edge->back->index = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void setitem_inplace(SetitemOp* op) {
|
||||
// LOGir << "in setitem_inplace";
|
||||
auto input = op->inputs().front();
|
||||
|
@ -37,7 +48,7 @@ static void setitem_inplace(SetitemOp* op) {
|
|||
}
|
||||
auto output = op->outputs().front();
|
||||
output->share_with(input);
|
||||
return;
|
||||
// return;
|
||||
|
||||
// LOGir << "pass setitem optim one";
|
||||
|
||||
|
@ -52,7 +63,12 @@ static void setitem_inplace(SetitemOp* op) {
|
|||
}
|
||||
|
||||
VarSlices vs = op->vs;
|
||||
if (!(data->is_finished() == 0 && (data->outputs().size() == 1 || (!input_op || input_op->inputs().size() == 0))))
|
||||
if (!(data->is_finished() == 0 &&
|
||||
(data->outputs().size() == 1 ||
|
||||
(!input_op
|
||||
|| input_op->inputs().size() == 0))))
|
||||
return;
|
||||
if (data->allocator)
|
||||
return;
|
||||
|
||||
auto in_shape = input->shape;
|
||||
|
@ -73,7 +89,7 @@ static void setitem_inplace(SetitemOp* op) {
|
|||
else if (s.is_slice())
|
||||
size = s.slice.start * input->size / in_shape[0];
|
||||
|
||||
data->input()->add_inputs(vector<Var*>{input});
|
||||
add_dependency(data->input(), {input->node()});
|
||||
data->share_with(input, size);
|
||||
// LOGir << "pass setitem optim two";
|
||||
}
|
||||
|
@ -176,6 +192,7 @@ static void getitem_inplace(GetitemOp* op) {
|
|||
void SetitemOp::graph_optimize() {
|
||||
// LOGir << "hello graph_optimize";
|
||||
setitem_inplace(this);
|
||||
(void)setitem_inplace;
|
||||
}
|
||||
|
||||
void GetitemOp::graph_optimize() {
|
||||
|
@ -185,6 +202,7 @@ void GetitemOp::graph_optimize() {
|
|||
(void)setitem_grad_opt;
|
||||
// (void)getitem_inplace;
|
||||
getitem_inplace(this);
|
||||
(void)getitem_inplace;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue