add nan checker

2020-12-16 18:05:31 +08:00 · 2020-12-16 18:05:31 +08:00 · d2c5d04ecf
parent b4a01c9b57
commit d2c5d04ecf
8 changed files with 172 additions and 10 deletions
--- a/python/jittor/compiler.py
+++ b/python/jittor/compiler.py
@ -74,10 +74,17 @@ def compile(compiler, flags, inputs, output, combind_build=False):
    for input, obj_file in zip(inputs, obj_files):
        cc = compiler
        nflags = oflags
-        if has_cuda and input.endswith(".cu"):
-            nflags = convert_nvcc_flags(oflags)
-            cc = nvcc_path
+        if input.endswith(".cu"):
+            if has_cuda:
+                nflags = convert_nvcc_flags(oflags)
+                cc = nvcc_path
+            else:
+                continue
        cmd = f"{cc} {input} {nflags} -c {lto_flags} -o {obj_file}"
+        if "nan_checker" in input:
+            # nan checker needs to disable fast_math 
+            cmd = cmd.replace("--use_fast_math", "")
+            cmd = cmd.replace("-Ofast", "-O2")
        cmds.append(cmd)
    jit_utils.run_cmds(cmds, cache_path, jittor_path, "Compiling "+base_output)
    cmd = f"{compiler} {' '.join(obj_files)} {flags} {lto_flags} {link} -o {output}"
@ -945,7 +952,7 @@ pyjt_gen_src = pyjt_compiler.compile(cache_path, jittor_path)
 # 3. op_utils
 # 4. other
 files2 = pyjt_gen_src
-files4 = run_cmd('find -L src | grep "cc$"', jittor_path).splitlines()
+files4 = run_cmd('find -L src | grep "c[cu]$"', jittor_path).splitlines()
 at_beginning = [
    "src/ops/op_utils.cc",
    "src/event_queue.cc",
--- a/python/jittor/misc.py
+++ b/python/jittor/misc.py
@ -627,12 +627,11 @@ def gather(x,dim,index):
    return x.reindex(ins)
 jt.Var.gather = gather

-def prod(x,dim=0):
+def _prod(x,dim=0):
    x = jt.log(x)
    x = x.sum(dim=dim)
    return jt.exp(x)

-jt.Var.prod = prod

 def cumsum_forward(np, data):
    a = data['inputs'][0]
--- a/src/executor.cc
+++ b/src/executor.cc
@ -21,6 +21,7 @@
 #include "fuser.h"
 #include "profiler/profiler_guard.h"
 #include "parallel_compiler.h"
+#include "misc/nan_checker.h"

 namespace jittor {

@ -46,7 +47,10 @@ void load_fused_op(FusedOp& fused_op, vector<int>& fuse_ops, vector<Op*>& ops, i
    for (Op* op : fused_op.ops) {
        uint fid1 = op->custom_data;
        int iid = 0;
-        for (Var* v : op->inputs()) {
+        for (auto ve : op->_inputs) {
+            // this is a control dependency edge, dont used
+            if (ve.back->index<0) continue;
+            auto v = ve.node->var();
            iid++;
            int iop_id;
            int iv_id;
@ -450,6 +454,8 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
            if (use_cuda)
                checkCudaErrors(cudaDeviceSynchronize());
            #endif
+            for (Var* var : op->outputs())
+                check_nan(var);
        }
        LOGvvv << "Finished Op(" >> op->name() << rid >> 
            "/" >> queue.size() >> ") output:" << op->outputs();
--- a/src/grad.cc
+++ b/src/grad.cc
@ -22,6 +22,7 @@ static auto make_number = get_op_info("number")

 VarPtr make_grad(Op* op, Var* out, Var* dout, Var* x, int x_index) {
    if (dout == nullptr) return nullptr;
+    if (x_index<0) return nullptr;
    LOGvvvv << "Make grad op:" >> op->name() << "inputs:" >> op->inputs()
        << "out:" >> out << "dout:" >> dout << "x:" >> x << "xid:" >> x_index;
    auto dx = op->grad(out, dout, x, x_index);
--- a/src/misc/nan_checker.cc
+++ b/src/misc/nan_checker.cc
@ -0,0 +1,71 @@
+// ***************************************************************
+// Copyright (c) 2020 Jittor. All Rights Reserved.
+// This file is subject to the terms and conditions defined in
+// file 'LICENSE.txt', which is part of this source code package.
+// ***************************************************************
+#include <cfloat>
+#include <cmath>
+#include "misc/nan_checker.h"
+#include "misc/cuda_flags.h"
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+#include "mem/allocator.h"
+#include "op.h"
+
+namespace jittor {
+
+
+#ifdef HAS_CUDA
+extern void check_nan_float32(float32* ptr, int64 num);
+extern void check_nan_float64(float64* ptr, int64 num);
+#endif
+
+bool check_nan(Var* v) {
+    if (!v->dtype().is_float()) return true;
+    if (v->input() && (
+            v->input()->name() == string("empty") ||
+            v->input()->name() == string("setitem")))
+        return true;
+    #ifdef HAS_CUDA
+    if (v->allocator->is_cuda()) {
+        if (v->dtype() == ns_float32) {
+            check_nan_float32((float32*)v->mem_ptr, v->num);
+        } else
+        if (v->dtype() == ns_float64) {
+            check_nan_float64((float64*)v->mem_ptr, v->num);
+        }
+        ASSERT(cudaDeviceSynchronize()==0) << "detect nan or inf at" << v;
+    } else
+    #endif
+    {
+        if (v->dtype() == ns_float32) {
+            auto* __restrict__ ptr = v->ptr<float32>();
+            auto num = v->num;
+            bool ok = true;
+            int64 i=0;
+            for (; i<num; i++) {
+                if (std::isinf(ptr[i]) || std::isnan(ptr[i])) {
+                    ok = false;
+                    break;
+                }
+            }
+            ASSERT(ok) << "detect nan at index" << i << v;
+        }
+        if (v->dtype() == ns_float64) {
+            auto* __restrict__ ptr = v->ptr<float64>();
+            auto num = v->num;
+            bool ok = true;
+            int64 i=0;
+            for (; i<num; i++) {
+                if (std::isinf(ptr[i]) || std::isnan(ptr[i])) {
+                    ok = false;
+                    break;
+                }
+            }
+            ASSERT(ok) << "detect nan at index" << i << v;
+        }
+    }
+    return true;
+}
+
+}
--- a/src/misc/nan_checker.cu
+++ b/src/misc/nan_checker.cu
@ -0,0 +1,47 @@
+// ***************************************************************
+// Copyright (c) 2020 Jittor. All Rights Reserved.
+// This file is subject to the terms and conditions defined in
+// file 'LICENSE.txt', which is part of this source code package.
+// ***************************************************************
+#include "misc/nan_checker.h"
+#include "misc/cuda_flags.h"
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+#include <cassert>
+
+namespace jittor {
+
+
+#ifdef HAS_CUDA
+__global__ void _check_nan_float32(float32* __restrict__ ptr, int64 num) {
+    int64 i = threadIdx.x + blockIdx.x * (int64)blockDim.x;
+    if (i<num) {
+        if (::isnan(ptr[i]) || ::isinf(ptr[i]))
+            __trap();
+    }
+}
+
+
+__global__ void _check_nan_float64(float64* __restrict__ ptr, int64 num) {
+    int64 i = threadIdx.x + blockIdx.x * (int64)blockDim.x;
+    if (i<num) {
+        if (::isnan(ptr[i]) || ::isinf(ptr[i]))
+            __trap();
+    }
+}
+
+void check_nan_float64(float64* ptr, int64 num) {
+    int block_num = std::max((int64)1, (num-1)/1024+1);
+    int thread_num = std::min((int64)1024, num);
+    _check_nan_float64<<<block_num, thread_num>>>(ptr, num);
+}
+
+void check_nan_float32(float32* ptr, int64 num) {
+    int block_num = std::max((int64)1, (num-1)/1024+1);
+    int thread_num = std::min((int64)1024, num);
+    _check_nan_float32<<<block_num, thread_num>>>(ptr, num);
+}
+
+#endif
+
+}
--- a/src/misc/nan_checker.h
+++ b/src/misc/nan_checker.h
@ -0,0 +1,13 @@
+// ***************************************************************
+// Copyright (c) 2020 Jittor. All Rights Reserved.
+// This file is subject to the terms and conditions defined in
+// file 'LICENSE.txt', which is part of this source code package.
+// ***************************************************************
+#pragma once
+#include "var.h"
+
+namespace jittor {
+
+bool check_nan(Var* v);
+
+}
--- a/src/opt/gopt/setitem_gopt.cc
+++ b/src/opt/gopt/setitem_gopt.cc
@ -16,6 +16,17 @@ inline static bool fast_strcmp(const char* a, const char* b) {
    return !*b;
 }

+// add dependency b -> a
+static inline void add_dependency(Node* a, const vector<Node*>& b) {
+    a->add_inputs(b);
+    auto edge = a->_inputs.end();
+    for (int i=0; i<b.size(); i++) {
+        edge = std::prev(edge);
+        // set -1 mean this is a control dependency edge
+        edge->back->index = -1;
+    }
+}
+
 static void setitem_inplace(SetitemOp* op) {
    // LOGir << "in setitem_inplace";
    auto input = op->inputs().front();
@ -37,7 +48,7 @@ static void setitem_inplace(SetitemOp* op) {
    }
    auto output = op->outputs().front();
    output->share_with(input);
-    return;
+    // return;
    
    // LOGir << "pass setitem optim one";

@ -52,7 +63,12 @@ static void setitem_inplace(SetitemOp* op) {
    }

    VarSlices vs = op->vs;
-    if (!(data->is_finished() == 0 && (data->outputs().size() == 1 || (!input_op || input_op->inputs().size() == 0))))
+    if (!(data->is_finished() == 0 && 
+          (data->outputs().size() == 1 || 
+           (!input_op 
+            || input_op->inputs().size() == 0))))
+        return;
+    if (data->allocator)
        return;

    auto in_shape = input->shape;
@ -73,7 +89,7 @@ static void setitem_inplace(SetitemOp* op) {
    else if (s.is_slice())
        size = s.slice.start * input->size / in_shape[0];
    
-    data->input()->add_inputs(vector<Var*>{input});
+    add_dependency(data->input(), {input->node()});
    data->share_with(input, size);
    // LOGir << "pass setitem optim two";
 }
@ -176,6 +192,7 @@ static void getitem_inplace(GetitemOp* op) {
 void SetitemOp::graph_optimize() {
    // LOGir << "hello graph_optimize";
    setitem_inplace(this);
+    (void)setitem_inplace;
 }

 void GetitemOp::graph_optimize() {
@ -185,6 +202,7 @@ void GetitemOp::graph_optimize() {
    (void)setitem_grad_opt;
    // (void)getitem_inplace;
    getitem_inplace(this);
+    (void)getitem_inplace;
 }

 }