From 34a9dab5fb31a892fa4e2ee46e9a7ab734ea6184 Mon Sep 17 00:00:00 2001 From: Dun Liang Date: Mon, 18 Oct 2021 15:37:02 +0800 Subject: [PATCH] polish error msg --- python/jittor/__init__.py | 2 +- python/jittor/pyjt_compiler.py | 21 ++++++--- python/jittor/src/executor.cc | 63 ++++++++++++++++++++++++--- python/jittor/src/pyjt/py_converter.h | 1 + python/jittor/src/utils/jit_utils.cc | 28 ++++++++++++ python/jittor/test/test_error_msg.py | 39 ++++++++++++++++- 6 files changed, 138 insertions(+), 16 deletions(-) diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py index 9140d50d..935d7763 100644 --- a/python/jittor/__init__.py +++ b/python/jittor/__init__.py @@ -9,7 +9,7 @@ # file 'LICENSE.txt', which is part of this source code package. # *************************************************************** -__version__ = '1.3.1.5' +__version__ = '1.3.1.6' from jittor_utils import lock with lock.lock_scope(): ori_int = int diff --git a/python/jittor/pyjt_compiler.py b/python/jittor/pyjt_compiler.py index ced8cb03..1a7789e0 100644 --- a/python/jittor/pyjt_compiler.py +++ b/python/jittor/pyjt_compiler.py @@ -692,13 +692,20 @@ def compile_src(src, h, basename): }} catch (const std::exception& e) {{ if (!PyErr_Occurred()) {{ std::stringstream ss; - ss {error_log_code}; - PyErr_Format(PyExc_RuntimeError, - "%s\\n%s\\nFailed reason:%s", - ss.str().c_str(), - R""({decs})"", - e.what() - ); + if (check_async_executor_error(e, ss)) {{ + PyErr_Format(PyExc_RuntimeError, + "%s", + ss.str().c_str() + ); + }} else {{ + ss {error_log_code}; + PyErr_Format(PyExc_RuntimeError, + "%s\\n%s\\nFailed reason:%s", + ss.str().c_str(), + R""({decs})"", + e.what() + ); + }} }} }} {func_return_failed}; diff --git a/python/jittor/src/executor.cc b/python/jittor/src/executor.cc index 8cdfa60f..c80ec0ca 100644 --- a/python/jittor/src/executor.cc +++ b/python/jittor/src/executor.cc @@ -29,6 +29,7 @@ #include "misc/nan_checker.h" #include "memory_profiler.h" #include "utils/seh.h" +#include "utils/cache_compile.h" namespace jittor { @@ -101,6 +102,55 @@ void load_fused_op(FusedOp& fused_op, vector& fuse_ops, vector& ops, i } } +void check_op_async_error(Op* op, bool is_fused_op, const std::exception& e, jittor::Log& logf) { + vector stack; + if (is_fused_op) { + FusedOp& fused_op = *((FusedOp*)op); + logf >> "[OP TYPE]:" << "fused_op:("; + for (auto& op : fused_op.ops) + logf << op->name_ex() >> ","; + logf >> ")\n"; + logf >> "[Input]:"; + for (auto& vi : fused_op.vars) + if (vi.type == 0) logf << vi.var->dtype() >> vi.var->shape >> vi.var->name >> ","; + logf << "\n[Output]:"; + Var* ov = nullptr; + for (auto& vi : fused_op.vars) + if (vi.type == 2) { + logf << vi.var->dtype() >> vi.var->shape >> vi.var->name >> ","; + ov = vi.var; + } + if (ov) + stack = get_node_trace(ov); + } else { + logf >> "[OP TYPE]:" << op->name_ex(); + logf << "\n[Input]:"; + for (auto v : op->inputs()) + logf << v->dtype() >> v->shape >> v->name >> ","; + logf << "\n[Output]:"; + Var* ov = nullptr; + for (auto v : op->outputs()) { + logf << v->dtype() >> v->shape >> v->name >> ","; + ov = v; + } + if (ov) + stack = get_node_trace(ov); + } + logf << "\n[Async Backtrace]:"; + if (stack.size()) { + logf << "---"; + for (auto& s : stack) { + logf << "\n " << s.file_path >> ":" >> s.lineno; + if (s.module_type.size()) logf << '<' >> s.module_type >> '>'; + if (s.module_name.size() && s.module_name.find(":") == string::npos) + logf << '[' >> s.module_name >> ']'; + } + } else + logf << "not found, please set env JT_SYNC=1, trace_py_var=3"; + logf << "\n[Reason]:" << e.what(); + jittor::LogFatalVoidify() && logf; +} + void Executor::run_sync(vector vars, bool device_sync) { auto allocator = get_allocator(); auto temp_allocator = get_allocator(true); @@ -531,13 +581,12 @@ void Executor::run_sync(vector vars, bool device_sync) { // log jit_key and file location op->do_prepare(jkl); string jit_src_path = Op::get_filename_from_jit_key(jkl.to_cstring(), ".cc"); - LOGe << "[Error] source file location:" << jit_src_path; - if (is_fused_op) { - LOGf << "Execute fused operator(" >> rid >> '/' >> queue.size() >> ")" - << "failed:" << fused_op.ops << "\n\nReason: " >> e.what(); - } else - LOGf << "Execute operator(" >> rid >> '/' >> queue.size() >> ")" - << "failed:" << op << "\n\nReason: " >> e.what(); + jittor::Log logf(__FILELINE__, 'f', 0); + logf << "\nExecute fused operator(" >> rid >> '/' >> queue.size() >> ")" + << "failed."; + if (jit_compiler::file_exist(jit_src_path)) + logf << "\n[JIT Source]:" << jit_src_path << "\n"; + check_op_async_error(op, is_fused_op, e, logf); } } LOGvv << "All" << op_num << "ops finished, return vars:" << vars; diff --git a/python/jittor/src/pyjt/py_converter.h b/python/jittor/src/pyjt/py_converter.h index a703a53c..b304f76c 100644 --- a/python/jittor/src/pyjt/py_converter.h +++ b/python/jittor/src/pyjt/py_converter.h @@ -786,5 +786,6 @@ DEF_IS(VarSlices, T) from_py_object(PyObject* obj, vector> } } +EXTERN_LIB bool check_async_executor_error(const std::exception& e, std::ostream& os); } // jittor diff --git a/python/jittor/src/utils/jit_utils.cc b/python/jittor/src/utils/jit_utils.cc index 7db727af..be789e9c 100644 --- a/python/jittor/src/utils/jit_utils.cc +++ b/python/jittor/src/utils/jit_utils.cc @@ -23,6 +23,34 @@ namespace jittor { +bool check_async_executor_error(const std::exception& e, std::ostream& os) { + if (!e.what()) return false; + auto s = string(e.what()); + if (s.find("executor.cc:") == string::npos) + return false; + os << s; + if (getenv("JT_SYNC") && getenv("trace_py_var")) + return true; + if (s.find("[Async Backtrace]: ---") != string::npos) + return true; + os << "\n**********\nAsync error was detected. " + "To locate the async backtrace, please rerun your code with " + "two enviroment variables set:\n" + #ifdef _WIN32 + "cmd: \n" + ">>> set JT_SYNC=1\n" + ">>> set trace_py_var=3\n" + "powershell: \n" + ">>> $env:JT_SYNC=1\n" + ">>> $env:trace_py_var=3\n" + #else + ">>> export JT_SYNC=1\n" + ">>> export trace_py_var=3\n" + #endif + ; + return true; +} + SEH_HOOK; void init_subprocess() { diff --git a/python/jittor/test/test_error_msg.py b/python/jittor/test/test_error_msg.py index ac1d8717..a2771206 100644 --- a/python/jittor/test/test_error_msg.py +++ b/python/jittor/test/test_error_msg.py @@ -27,7 +27,44 @@ class TestErrorMsg(unittest.TestCase): throw std::runtime_error("???"); """ ) - print(b) + msg = "" + try: + print(b) + except Exception as e: + msg = str(e) + assert "[Reason]: ???" in msg + assert "[Input]: int32[3,]" in msg + assert "[OP TYPE]: code" in msg + assert "[Async Backtrace]:" in msg + + @jt.flag_scope(trace_py_var=3) + def test_error_msg_trace_py_var(self): + a = jt.array([3,2,1]) + b = jt.code(a.shape, a.dtype, [a], + cpu_header=""" + #include + @alias(a, in0) + @alias(b, out) + """, + cpu_src=""" + for (int i=0; i