Merge branch 'master' of https://github.com/toddlt/jittor into cjld

2020-03-29 22:49:18 +08:00 · 2020-03-29 22:49:18 +08:00 · 843534e52a
parent c429786538 9bb59d2d81
commit 843534e52a
3 changed files with 22 additions and 1 deletions
--- a/src/fetcher.cc
+++ b/src/fetcher.cc
@ -33,7 +33,12 @@ static void fetch_caller() {
    fetch_tasks.pop_front();
 }

+
+#if CUDA_VERSION < 10000
+static void to_fetch(cudaStream_t stream, cudaError_t status, void*) {
+#else
 static void to_fetch(void*) {
+#endif
    event_queue.push(fetch_caller);
 }

@ -96,7 +101,11 @@ void fetch(const vector<VarHolder*>& vh, FetchFunc&& func) {
    #ifdef HAS_CUDA
    if (has_cuda_memcpy) {
        fetch_tasks.push_back({move(func), move(allocations), move(arrays)});
+#if CUDA_VERSION < 10000
+        checkCudaErrors(cudaStreamAddCallback(stream, &to_fetch, 0, 0));
+#else
        checkCudaErrors(cudaLaunchHostFunc(stream, &to_fetch, 0));
+#endif
    } else
    #endif
    {
@ -105,4 +114,4 @@ void fetch(const vector<VarHolder*>& vh, FetchFunc&& func) {
    }
 }

-} // jittor
+} // jittor
--- a/src/mem/allocator/cuda_dual_allocator.cc
+++ b/src/mem/allocator/cuda_dual_allocator.cc
@ -26,7 +26,11 @@ static void free_caller() {

 }

+#if CUDA_VERSION < 10000
+void to_free_allocation(cudaStream_t stream, cudaError_t status, void*) {
+#else
 void to_free_allocation(void*) {
+#endif
    using namespace cuda_dual_local;
    event_queue.push(free_caller);
 }
--- a/src/mem/allocator/cuda_dual_allocator.h
+++ b/src/mem/allocator/cuda_dual_allocator.h
@ -79,7 +79,11 @@ extern list<Allocation> allocations;

 }

+#if CUDA_VERSION < 10000
+void to_free_allocation(cudaStream_t stream, cudaError_t status, void*);
+#else
 void to_free_allocation(void*);
+#endif

 struct DelayFree final : Allocator {
    inline uint64 flags() const override { return _cuda; };
@ -94,7 +98,11 @@ struct DelayFree final : Allocator {
    void free(void* mem_ptr, size_t size, const size_t& allocation) override {
        using namespace cuda_dual_local;
        allocations.emplace_back(mem_ptr, allocation, size, &cuda_dual_allocator);
+#if CUDA_VERSION < 10000
+        checkCudaErrors(cudaStreamAddCallback(0, &to_free_allocation, 0, 0));
+#else
        checkCudaErrors(cudaLaunchHostFunc(0, &to_free_allocation, 0));
+#endif
    }

    void migrate_to_cpu(void*& mem_ptr, size_t& allocation, size_t size, Allocator* allocator) {