better error control && fix doc && fix free buffer

2020-08-14 13:49:33 +08:00 · 2020-08-14 13:49:33 +08:00 · 1ad00d4580
parent f9e290160b
commit 1ad00d4580
15 changed files with 112 additions and 29 deletions
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@ -26,7 +26,9 @@ copyright = '2020, Jittor'
 author = 'Jittor'

 # The full version, including alpha/beta/rc tags
-release = '1.1.3.1'
+release = jittor.__version__
+# fix AttributeError for "typing.get_type_hints(jt.Var)"
+jittor.Var.__module__ = "jittor_core"

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
--- a/python/jittor/init.py
+++ b/python/jittor/init.py
@ -7,7 +7,7 @@
 # This file is subject to the terms and conditions defined in
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
-__version__ = '1.1.7.5'
+__version__ = '1.1.7.6'
 from . import lock
 with lock.lock_scope():
    from . import compiler
--- a/python/jittor/compiler.py
+++ b/python/jittor/compiler.py
@ -798,7 +798,7 @@ def check_debug_flags():
        global cc_flags
        cc_flags += " -g -DNODE_MEMCHECK "

-cc_flags = " " + os.environ.get("cc_flags", "")
+cc_flags = " "
 # os.RTLD_NOW | os.RTLD_GLOBAL cause segfault when import torch first
 import_flags = os.RTLD_NOW | os.RTLD_GLOBAL | os.RTLD_DEEPBIND
 # if cc_type=="icc":
@ -841,6 +841,8 @@ has_pybt = check_pybt(gdb_path, python_path)

 cc_flags += " -Wall -Werror -Wno-unknown-pragmas -std=c++14 -fPIC -march=native "
 cc_flags += " -fdiagnostics-color=always "
+if "cc_flags" in os.environ:
+    cc_flags += os.environ["cc_flags"] + ' '
 link_flags = " -lstdc++ -ldl -shared "
 core_link_flags = ""
 opt_flags = ""
--- a/python/jittor/misc.py
+++ b/python/jittor/misc.py
@ -246,10 +246,11 @@ def unbind(x, dim=0):

    Example:

-        jt.random((3,3))
+        a = jt.random((3,3))
+        b = jt.unbind(a, 0)

    '''
-    if dim < 0: dim += len(input.shape)
+    if dim < 0: dim += len(x.shape)
    return [x[(slice(None),)*dim+(i,)] for i in range(x.shape[dim])]

 def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0):
@ -261,4 +262,4 @@ def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=Fals
    ncol = math.ceil(b / nrow)
    return x.reindex([c, h*ncol+(ncol+1)*padding, w*nrow+(nrow+1)*padding], 
                     [f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0", 
-                      f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}"], overflow_value=pad_value)
+                      f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}"], overflow_value=pad_value)
--- a/python/jittor/nn.py
+++ b/python/jittor/nn.py
@ -734,13 +734,11 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros'):
    Example:

        >>> x = jt.array([[[[1,2],[3,4]]]])
-
        >>> print(x)
        [[[[1 2]
        [3 4]]]] 

        >>> grid = jt.array([[[[0.5, 0.5]]]])
-        
        >>> print(x.shape, grid.shape)
        [1,1,2,2,], [1,1,2,2,]

--- a/python/jittor/test/test_lazy_execution.py
+++ b/python/jittor/test/test_lazy_execution.py
@ -0,0 +1,48 @@
+# ***************************************************************
+# Copyright (c) 2020 Jittor. Authors:
+#   Meng-Hao Guo <guomenghao1997@gmail.com>
+#   Dun Liang <randonlang@gmail.com>.
+#
+# All Rights Reserved.
+# This file is subject to the terms and conditions defined in
+# file 'LICENSE.txt', which is part of this source code package.
+# ***************************************************************
+import jittor as jt
+import unittest
+import sys, os
+from subprocess import getoutput
+
+class TestLazyExecution(unittest.TestCase):
+    @unittest.skipIf(not jt.has_cuda, "No cuda found")
+    def test_lazy_execution(self):
+        code = """
+import jittor as jt
+jt.flags.use_cuda = 1
+
+a = jt.zeros(1)
+b = jt.code([1], a.dtype, [a],
+cuda_header='''
+#include <assert.h>
+''',
+cuda_src='''
+__global__ void kernel(float32* a, float32* b) {
+    b[0] = a[0];
+    assert(a[0] == 1);
+}
+kernel<<<1,1>>>(in0_p, out0_p);
+''')
+c = a+b
+print(c)
+"""
+        fpath = os.path.join(jt.flags.cache_path, "lazy_error.py")
+        with open(fpath, 'w') as f:
+            f.write(code)
+        res = getoutput(f"{sys.executable} {fpath}")
+        assert 'print(c)' in res
+        res = getoutput(f"lazy_execution=0 {sys.executable} {fpath}")
+        assert "''')" in res
+        
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/jittor/test/test_numpy_code_op.py
+++ b/python/jittor/test/test_numpy_code_op.py
@ -10,10 +10,14 @@ import unittest
 from jittor import Function
 import jittor as jt
 import numpy
-import cupy
 import ctypes
 import sys

+try:
+    import cupy
+except:
+    pass
+
 class TestCodeOp(unittest.TestCase):
    def test_func(self):
        class Func(Function):
--- a/python/jittor/utils/polish.py
+++ b/python/jittor/utils/polish.py
@ -20,6 +20,7 @@ import jittor as jt
 from jittor import LOG
 from jittor.compiler import run_cmd
 from jittor_utils import translator
+import sys

 jittor_path = os.path.realpath(os.path.join(jt.flags.jittor_path, "..", ".."))

@ -68,12 +69,14 @@ for cc_type in ["g++", "clang"]:
        env = f"cache_name=build/{cc_type}/{device} cc_path="
        cname = "g++" if cc_type=="g++" else "clang-8"
        env += cname
-        env += " "
+        # use core2 arch, avoid using avx instructions
+        # TODO: support more archs, such as arm, or use ir(GIMPLE or LLVM)
+        env += " cc_flags='-march=core2' "
        if device == "cpu":
            env += "nvcc_path='' "
        elif jt.flags.nvcc_path == "":
            env = "unset nvcc_path && " + env
-        cmd = f"{env} python3.7 -c 'import jittor'"
+        cmd = f"{env} {sys.executable} -c 'import jittor'"
        LOG.i("run cmd:", cmd)
        os.system(cmd)
        LOG.i("run cmd:", cmd)
--- a/python/jittor/version
+++ b/python/jittor/version
@ -1 +1 @@
-a62b45d6caf9c1c18a9118630ec8a591c576e635
+f9e290160bead0d5892754da56b9ad63bc316320
--- a/src/event_queue.cc
+++ b/src/event_queue.cc
@ -25,10 +25,16 @@ void EventQueue::Worker::start() {
 }

 void EventQueue::worker_caller() {
-    event_queue.func();
+    int status = OK;
+    try {
+        event_queue.func();
+    } catch (const std::exception& e) {
+        LOGe << "Catch error:\n" >> e.what();
+        status = ERROR;
+    }
    {
        std::lock_guard<std::mutex> l(event_queue.mtx);
-        event_queue.run_sync_done = true;
+        event_queue.run_sync_done = status;
    }
 }

--- a/src/event_queue.h
+++ b/src/event_queue.h
@ -12,6 +12,9 @@
 namespace jittor {

 struct EventQueue {
+    static constexpr int RUNNING = 0;
+    static constexpr int OK = 1;
+    static constexpr int ERROR = 2;
    typedef void(*Func)();
    struct Worker {
        Func todo;
@ -39,7 +42,7 @@ struct EventQueue {
    std::condition_variable cv;
    std::mutex mtx;
    Func func;
-    volatile bool run_sync_done;
+    volatile int run_sync_done;

    inline void flush() {
        list<Func> ts;
@ -53,11 +56,11 @@ struct EventQueue {

    static void worker_caller();

-    void run_sync(Func func) {
+    int run_sync(Func func) {
        // send work to worker and do something by self
        std::unique_lock<std::mutex> l(mtx);
        this->func = func;
-        run_sync_done = false;
+        run_sync_done = RUNNING;
        // send func to worker
        worker.run(worker_caller);
        while (1) {
@ -70,8 +73,8 @@ struct EventQueue {
                func();
            l.lock();
            // worker is finished
-            if (run_sync_done)
-                return;
+            if (int ret = run_sync_done)
+                return ret;
        }
    }

--- a/src/executor.cc
+++ b/src/executor.cc
@ -318,6 +318,7 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
    }

    // running
+    SetupFreeBuffer setup_free_buffer;
    FusedOp fused_op;
    vector<Var*> outputs_bk;
    #ifdef HAS_CUDA
@ -446,9 +447,9 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
    if (device_sync && use_cuda) {
        last_is_cuda = false;
        sync_times++;
-        event_queue.run_sync([]() {
+        CHECK(EventQueue::OK == event_queue.run_sync([]() {
            checkCudaErrors(cudaDeviceSynchronize());
-        });
+        }));
    }
    LOGvv << "cudaDeviceSynchronize times:" << sync_times << "/" <<queue.size();
    #endif
--- a/src/node.h
+++ b/src/node.h
@ -13,6 +13,8 @@ namespace jittor {

 extern unordered_map<void*, int64> lived_nodes;
 extern int64 total_node;
+extern int64 nt;
+extern vector<Node*> free_buffer;

 struct NodeFlags {
    typedef uint16 nf_t;
@ -186,6 +188,27 @@ struct Node {
    void set_stop_grad();
 };

+struct SetupFreeBuffer {
+
+bool outside;
+inline SetupFreeBuffer() {
+    outside = !nt;
+    if (outside) {
+        nt = ++Node::tflag_count;
+    }
+}
+
+inline ~SetupFreeBuffer() {
+    if (outside) {
+        for (int i=0; i<free_buffer.size(); i++)
+            delete free_buffer[i];
+        free_buffer.clear();
+        nt = 0;
+    }
+}
+
+};
+
 std::ostream& operator<<(std::ostream& os, const Node* node);

 } // jittor
--- a/src/var.cc
+++ b/src/var.cc
@ -30,13 +30,6 @@ Var::Var(NanoVector shape, NanoString dtype)
    number_of_lived_vars++;
    numel();
 }
-Var::~Var() {
-    if (mem_ptr != nullptr)
-        allocator->free(mem_ptr, size, allocation);
-    number_of_lived_vars--;
-    if (flags.get(NodeFlags::_in_update_queue))
-        update_queue.pop(this);
-}
    
 string Var::to_string() {
    string s = dtype().to_cstring();
--- a/src/var.h
+++ b/src/var.h
@ -37,7 +37,6 @@ struct Var : Node {
    inline Op* output(uint i) { return Node::output(i)->op(); }

    Var(NanoVector shape, NanoString dtype);
-    ~Var();

    string to_string();
    int64_t numel();