mirror of https://github.com/Jittor/Jittor
support disable cuda managed memory
This commit is contained in:
parent
aefe719770
commit
bebd218bdd
|
@ -8,7 +8,7 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
__version__ = '1.2.2.32'
|
||||
__version__ = '1.2.2.33'
|
||||
from . import lock
|
||||
with lock.lock_scope():
|
||||
ori_int = int
|
||||
|
|
|
@ -23,5 +23,16 @@ class TestAllocator(unittest.TestCase):
|
|||
assert jt.flags.stat_allocator_total_free_call == 2
|
||||
assert jt.flags.stat_allocator_total_free_byte == 800
|
||||
|
||||
@unittest.skipIf(not jt.has_cuda, "Cuda not found")
|
||||
@jt.flag_scope(use_cuda=1, use_cuda_managed_allocator=0)
|
||||
def test_device_allocator(self):
|
||||
a = jt.array([1,2,3,4,5])
|
||||
b = a + 1
|
||||
c = jt.code(a.shape, a.dtype, [b], cpu_src="""
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @in0(i)*@in0(i)*2;
|
||||
""")
|
||||
assert (c.data == [8,18,32,50,72]).all()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -37,6 +37,10 @@ DECLARE_FLAG(int, profile_memory_enable);
|
|||
|
||||
// from fetch_op.cc
|
||||
extern list<VarPtr> fetcher_to_free;
|
||||
// from cuda_managed_allocator
|
||||
#ifdef HAS_CUDA
|
||||
DECLARE_FLAG(int, use_cuda_managed_allocator);
|
||||
#endif
|
||||
|
||||
void load_fused_op(FusedOp& fused_op, vector<int>& fuse_ops, vector<Op*>& ops, int ll, int rr, int64 tt) {
|
||||
fused_op.ops.clear();
|
||||
|
@ -445,6 +449,14 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
|
|||
for (Var* v : op->inputs()) {
|
||||
migrate_to_cpu(v, allocator);
|
||||
}
|
||||
if (!use_cuda_managed_allocator) {
|
||||
for (auto* var : op->outputs()) {
|
||||
var->allocator->free(var->mem_ptr, var->size, var->allocation);
|
||||
var->mem_ptr = var->allocator = nullptr;
|
||||
var->allocation = 0;
|
||||
var->alloc(cpu_allocator);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef NODE_MEMCHECK
|
||||
|
@ -459,6 +471,14 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
|
|||
#endif
|
||||
last_is_cuda = is_cuda;
|
||||
op->do_run_after_prepare(jkl);
|
||||
#ifdef HAS_CUDA
|
||||
// migrate to gpu
|
||||
if (PREDICT_BRANCH_NOT_TAKEN((!is_cuda && use_cuda && !use_cuda_managed_allocator))) {
|
||||
for (Var* v : op->outputs()) {
|
||||
migrate_to_gpu(v, allocator);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// record trace data
|
||||
if (PREDICT_BRANCH_NOT_TAKEN(trace_py_var>=2)) {
|
||||
trace_data.record_execution(op, is_fused_op, jkl);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#ifdef HAS_CUDA
|
||||
#include "mem/allocator/cuda_managed_allocator.h"
|
||||
#include "mem/allocator/cuda_device_allocator.h"
|
||||
#include "mem/allocator/cuda_dual_allocator.h"
|
||||
#endif
|
||||
#include "mem/allocator/stat_allocator.h"
|
||||
#include "mem/allocator/sfrl_allocator.h"
|
||||
|
@ -91,4 +92,42 @@ void gc_all() {
|
|||
for (auto& kv : allocators) kv.second->gc();
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_CUDA
|
||||
|
||||
void migrate_to_cpu(Var* var, Allocator* allocator) {
|
||||
if (!use_cuda_managed_allocator)
|
||||
allocator = cpu_allocator;
|
||||
if (var->allocator == &delay_free) {
|
||||
var->allocator = allocator;
|
||||
delay_free.migrate_to_cpu(
|
||||
var->mem_ptr, var->allocation, var->size, var->allocator
|
||||
);
|
||||
} else
|
||||
if (!use_cuda_managed_allocator) {
|
||||
// must be a device allocator
|
||||
Allocation a(allocator, var->size);
|
||||
checkCudaErrors(cudaMemcpy(a.ptr, var->mem_ptr, var->size, cudaMemcpyDeviceToHost));
|
||||
var->allocator->free(var->mem_ptr, var->size, var->allocation);
|
||||
var->mem_ptr = a.ptr;
|
||||
var->allocation = a.allocation;
|
||||
var->allocator = a.allocator;
|
||||
a.ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void migrate_to_gpu(Var* var, Allocator* allocator) {
|
||||
// only happend when not using use_cuda_managed_allocator
|
||||
Allocation a(allocator, var->size);
|
||||
checkCudaErrors(cudaMemcpy(a.ptr, var->mem_ptr, var->size, cudaMemcpyHostToDevice));
|
||||
var->allocator->free(var->mem_ptr, var->size, var->allocation);
|
||||
var->mem_ptr = a.ptr;
|
||||
var->allocation = a.allocation;
|
||||
var->allocator = a.allocator;
|
||||
a.ptr = nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // jittor
|
|
@ -53,4 +53,9 @@ Allocator* get_allocator(bool temp_allocator=false);
|
|||
// @pyjt(gc)
|
||||
void gc_all();
|
||||
|
||||
#ifdef HAS_CUDA
|
||||
void migrate_to_cpu(Var* var, Allocator* allocator);
|
||||
void migrate_to_gpu(Var* var, Allocator* allocator);
|
||||
#endif
|
||||
|
||||
} // jittor
|
|
@ -115,16 +115,6 @@ struct DelayFree final : Allocator {
|
|||
|
||||
extern DelayFree delay_free;
|
||||
|
||||
inline void migrate_to_cpu(Var* var, Allocator* allocator) {
|
||||
if (var->allocator == &delay_free) {
|
||||
var->allocator = allocator;
|
||||
delay_free.migrate_to_cpu(
|
||||
var->mem_ptr, var->allocation, var->size, var->allocator
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue