add cuda version 9.0 support

2020-03-29 23:09:51 +08:00 · 2020-03-29 23:09:51 +08:00 · 16c7d3e58c
parent 843534e52a
commit 16c7d3e58c
6 changed files with 26 additions and 30 deletions
--- a/python/jittor/compiler.py
+++ b/python/jittor/compiler.py
@ -724,8 +724,9 @@ def check_cache_compile():
 def env_or_try_find(name, bname):
    if name in os.environ:
        path = os.environ[name]
-        version = jit_utils.get_version(path)
-        LOG.i(f"Found {bname}{version} at {path}")
+        if path != "":
+            version = jit_utils.get_version(path)
+            LOG.i(f"Found {bname}{version} at {path}")
        return path
    return try_find_exe(bname)

--- a/python/jittor_utils/init.py
+++ b/python/jittor_utils/init.py
@ -193,8 +193,9 @@ def find_exe(name, check_version=True):
 def env_or_find(name, bname):
    if name in os.environ:
        path = os.environ[name]
-        version = get_version(path)
-        LOG.i(f"Found {bname}{version} at {path}")
+        if path != "":
+            version = get_version(path)
+            LOG.i(f"Found {bname}{version} at {path}")
        return path
    return find_exe(bname)

--- a/src/fetcher.cc
+++ b/src/fetcher.cc
@ -7,6 +7,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include <mutex>
+#include "misc/cuda_flags.h"
 #include "mem/allocator/sfrl_allocator.h"
 #include "mem/allocator/cuda_dual_allocator.h"
 #include "event_queue.h"
@ -33,12 +34,7 @@ static void fetch_caller() {
    fetch_tasks.pop_front();
 }

-
-#if CUDA_VERSION < 10000
-static void to_fetch(cudaStream_t stream, cudaError_t status, void*) {
-#else
-static void to_fetch(void*) {
-#endif
+static void to_fetch(CUDA_HOST_FUNC_ARGS) {
    event_queue.push(fetch_caller);
 }

@ -101,11 +97,7 @@ void fetch(const vector<VarHolder*>& vh, FetchFunc&& func) {
    #ifdef HAS_CUDA
    if (has_cuda_memcpy) {
        fetch_tasks.push_back({move(func), move(allocations), move(arrays)});
-#if CUDA_VERSION < 10000
-        checkCudaErrors(cudaStreamAddCallback(stream, &to_fetch, 0, 0));
-#else
-        checkCudaErrors(cudaLaunchHostFunc(stream, &to_fetch, 0));
-#endif
+        checkCudaErrors(_cudaLaunchHostFunc(stream, &to_fetch, 0));
    } else
    #endif
    {
--- a/src/mem/allocator/cuda_dual_allocator.cc
+++ b/src/mem/allocator/cuda_dual_allocator.cc
@ -4,6 +4,7 @@
 // file 'LICENSE.txt', which is part of this source code package.
 // ***************************************************************
 #ifdef HAS_CUDA
+#include "misc/cuda_flags.h"
 #include "mem/allocator/cuda_dual_allocator.h"
 #include "mem/allocator/cuda_host_allocator.h"
 #include "mem/allocator/cuda_device_allocator.h"
@ -26,11 +27,7 @@ static void free_caller() {

 }

-#if CUDA_VERSION < 10000
-void to_free_allocation(cudaStream_t stream, cudaError_t status, void*) {
-#else
-void to_free_allocation(void*) {
-#endif
+void to_free_allocation(CUDA_HOST_FUNC_ARGS) {
    using namespace cuda_dual_local;
    event_queue.push(free_caller);
 }
--- a/src/mem/allocator/cuda_dual_allocator.h
+++ b/src/mem/allocator/cuda_dual_allocator.h
@ -10,6 +10,7 @@
 #include <cstring>
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
+#include "misc/cuda_flags.h"
 #include "var.h"
 #include "mem/allocator.h"
 #include "mem/allocator/sfrl_allocator.h"
@ -79,11 +80,7 @@ extern list<Allocation> allocations;

 }

-#if CUDA_VERSION < 10000
-void to_free_allocation(cudaStream_t stream, cudaError_t status, void*);
-#else
-void to_free_allocation(void*);
-#endif
+void to_free_allocation(CUDA_HOST_FUNC_ARGS);

 struct DelayFree final : Allocator {
    inline uint64 flags() const override { return _cuda; };
@ -98,11 +95,7 @@ struct DelayFree final : Allocator {
    void free(void* mem_ptr, size_t size, const size_t& allocation) override {
        using namespace cuda_dual_local;
        allocations.emplace_back(mem_ptr, allocation, size, &cuda_dual_allocator);
-#if CUDA_VERSION < 10000
-        checkCudaErrors(cudaStreamAddCallback(0, &to_free_allocation, 0, 0));
-#else
-        checkCudaErrors(cudaLaunchHostFunc(0, &to_free_allocation, 0));
-#endif
+        checkCudaErrors(_cudaLaunchHostFunc(0, &to_free_allocation, 0));
    }

    void migrate_to_cpu(void*& mem_ptr, size_t& allocation, size_t size, Allocator* allocator) {
--- a/src/misc/cuda_flags.h
+++ b/src/misc/cuda_flags.h
@ -10,6 +10,18 @@ namespace jittor {

 #ifdef HAS_CUDA
 DECLARE_FLAG(int, use_cuda);
+
+// #if CUDA_VERSION < 10000
+#if 1
+    #define _cudaLaunchHostFunc(a,b,c) \
+        cudaStreamAddCallback(a,b,c,0)
+    #define CUDA_HOST_FUNC_ARGS cudaStream_t stream, cudaError_t status, void*
+#else
+    #define _cudaLaunchHostFunc(a,b,c) \
+        cudaLaunchHostFunc(a,b,c)
+    #define CUDA_HOST_FUNC_ARGS void*
+#endif
+
 #else
 constexpr int use_cuda = 0;
 #endif