polish error msg

This commit is contained in:
Dun Liang 2021-10-18 15:37:02 +08:00
parent 6c64a20493
commit 34a9dab5fb
6 changed files with 138 additions and 16 deletions

View File

@ -9,7 +9,7 @@
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
__version__ = '1.3.1.5'
__version__ = '1.3.1.6'
from jittor_utils import lock
with lock.lock_scope():
ori_int = int

View File

@ -692,13 +692,20 @@ def compile_src(src, h, basename):
}} catch (const std::exception& e) {{
if (!PyErr_Occurred()) {{
std::stringstream ss;
ss {error_log_code};
PyErr_Format(PyExc_RuntimeError,
"%s\\n%s\\nFailed reason:%s",
ss.str().c_str(),
R""({decs})"",
e.what()
);
if (check_async_executor_error(e, ss)) {{
PyErr_Format(PyExc_RuntimeError,
"%s",
ss.str().c_str()
);
}} else {{
ss {error_log_code};
PyErr_Format(PyExc_RuntimeError,
"%s\\n%s\\nFailed reason:%s",
ss.str().c_str(),
R""({decs})"",
e.what()
);
}}
}}
}}
{func_return_failed};

View File

@ -29,6 +29,7 @@
#include "misc/nan_checker.h"
#include "memory_profiler.h"
#include "utils/seh.h"
#include "utils/cache_compile.h"
namespace jittor {
@ -101,6 +102,55 @@ void load_fused_op(FusedOp& fused_op, vector<int>& fuse_ops, vector<Op*>& ops, i
}
}
void check_op_async_error(Op* op, bool is_fused_op, const std::exception& e, jittor::Log& logf) {
vector<Stack> stack;
if (is_fused_op) {
FusedOp& fused_op = *((FusedOp*)op);
logf >> "[OP TYPE]:" << "fused_op:(";
for (auto& op : fused_op.ops)
logf << op->name_ex() >> ",";
logf >> ")\n";
logf >> "[Input]:";
for (auto& vi : fused_op.vars)
if (vi.type == 0) logf << vi.var->dtype() >> vi.var->shape >> vi.var->name >> ",";
logf << "\n[Output]:";
Var* ov = nullptr;
for (auto& vi : fused_op.vars)
if (vi.type == 2) {
logf << vi.var->dtype() >> vi.var->shape >> vi.var->name >> ",";
ov = vi.var;
}
if (ov)
stack = get_node_trace(ov);
} else {
logf >> "[OP TYPE]:" << op->name_ex();
logf << "\n[Input]:";
for (auto v : op->inputs())
logf << v->dtype() >> v->shape >> v->name >> ",";
logf << "\n[Output]:";
Var* ov = nullptr;
for (auto v : op->outputs()) {
logf << v->dtype() >> v->shape >> v->name >> ",";
ov = v;
}
if (ov)
stack = get_node_trace(ov);
}
logf << "\n[Async Backtrace]:";
if (stack.size()) {
logf << "---";
for (auto& s : stack) {
logf << "\n " << s.file_path >> ":" >> s.lineno;
if (s.module_type.size()) logf << '<' >> s.module_type >> '>';
if (s.module_name.size() && s.module_name.find(":") == string::npos)
logf << '[' >> s.module_name >> ']';
}
} else
logf << "not found, please set env JT_SYNC=1, trace_py_var=3";
logf << "\n[Reason]:" << e.what();
jittor::LogFatalVoidify() && logf;
}
void Executor::run_sync(vector<Var*> vars, bool device_sync) {
auto allocator = get_allocator();
auto temp_allocator = get_allocator(true);
@ -531,13 +581,12 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
// log jit_key and file location
op->do_prepare(jkl);
string jit_src_path = Op::get_filename_from_jit_key(jkl.to_cstring(), ".cc");
LOGe << "[Error] source file location:" << jit_src_path;
if (is_fused_op) {
LOGf << "Execute fused operator(" >> rid >> '/' >> queue.size() >> ")"
<< "failed:" << fused_op.ops << "\n\nReason: " >> e.what();
} else
LOGf << "Execute operator(" >> rid >> '/' >> queue.size() >> ")"
<< "failed:" << op << "\n\nReason: " >> e.what();
jittor::Log logf(__FILELINE__, 'f', 0);
logf << "\nExecute fused operator(" >> rid >> '/' >> queue.size() >> ")"
<< "failed.";
if (jit_compiler::file_exist(jit_src_path))
logf << "\n[JIT Source]:" << jit_src_path << "\n";
check_op_async_error(op, is_fused_op, e, logf);
}
}
LOGvv << "All" << op_num << "ops finished, return vars:" << vars;

View File

@ -786,5 +786,6 @@ DEF_IS(VarSlices, T) from_py_object(PyObject* obj, vector<unique_ptr<VarHolder>>
}
}
EXTERN_LIB bool check_async_executor_error(const std::exception& e, std::ostream& os);
} // jittor

View File

@ -23,6 +23,34 @@
namespace jittor {
bool check_async_executor_error(const std::exception& e, std::ostream& os) {
if (!e.what()) return false;
auto s = string(e.what());
if (s.find("executor.cc:") == string::npos)
return false;
os << s;
if (getenv("JT_SYNC") && getenv("trace_py_var"))
return true;
if (s.find("[Async Backtrace]: ---") != string::npos)
return true;
os << "\n**********\nAsync error was detected. "
"To locate the async backtrace, please rerun your code with "
"two enviroment variables set:\n"
#ifdef _WIN32
"cmd: \n"
">>> set JT_SYNC=1\n"
">>> set trace_py_var=3\n"
"powershell: \n"
">>> $env:JT_SYNC=1\n"
">>> $env:trace_py_var=3\n"
#else
">>> export JT_SYNC=1\n"
">>> export trace_py_var=3\n"
#endif
;
return true;
}
SEH_HOOK;
void init_subprocess() {

View File

@ -27,7 +27,44 @@ class TestErrorMsg(unittest.TestCase):
throw std::runtime_error("???");
"""
)
print(b)
msg = ""
try:
print(b)
except Exception as e:
msg = str(e)
assert "[Reason]: ???" in msg
assert "[Input]: int32[3,]" in msg
assert "[OP TYPE]: code" in msg
assert "[Async Backtrace]:" in msg
@jt.flag_scope(trace_py_var=3)
def test_error_msg_trace_py_var(self):
a = jt.array([3,2,1])
b = jt.code(a.shape, a.dtype, [a],
cpu_header="""
#include <algorithm>
@alias(a, in0)
@alias(b, out)
""",
cpu_src="""
for (int i=0; i<a_shape0; i++)
@b(i) = @a(i);
std::sort(&@b(0), &@b(in0_shape0));
throw std::runtime_error("???");
"""
)
msg = ""
try:
print(b)
except Exception as e:
msg = str(e)
print(msg)
assert "[Reason]: ???" in msg
assert "[Input]: int32[3,]" in msg
assert "[OP TYPE]: code" in msg
assert "[Async Backtrace]:" in msg
assert "test_error_msg.py:" in msg
if __name__ == "__main__":