mirror of https://github.com/Jittor/Jittor
commit
daf04e9fb5
|
@ -1191,15 +1191,17 @@ ascend_toolkit_home = os.getenv('ASCEND_TOOLKIT_HOME')
|
||||||
|
|
||||||
# build cache_compile
|
# build cache_compile
|
||||||
cc_flags += f" -I\"{os.path.join(jittor_path, 'src')}\" "
|
cc_flags += f" -I\"{os.path.join(jittor_path, 'src')}\" "
|
||||||
cc_flags += f" -I\"{os.path.join(jittor_path, 'extern')}\" "
|
|
||||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" "
|
if ascend_toolkit_home:
|
||||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" "
|
cc_flags += f" -I\"{os.path.join(jittor_path, 'extern')}\" "
|
||||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" "
|
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" "
|
||||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" "
|
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" "
|
||||||
cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" "
|
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" "
|
||||||
cc_flags += " -llibascendcl "
|
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" "
|
||||||
cc_flags += " -llibnnopbase "
|
cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" "
|
||||||
cc_flags += " -llibopapi "
|
cc_flags += " -llibascendcl "
|
||||||
|
cc_flags += " -llibnnopbase "
|
||||||
|
cc_flags += " -llibopapi "
|
||||||
|
|
||||||
cc_flags += py_include
|
cc_flags += py_include
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include <acl/acl.h>
|
#include <acl/acl.h>
|
||||||
|
#include "aclnn.h"
|
||||||
|
|
||||||
std::string acl_error_to_string(aclError error);
|
std::string acl_error_to_string(aclError error);
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include <pystate.h>
|
#include <pystate.h>
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "aclnn.h"
|
||||||
|
|
||||||
namespace jittor
|
namespace jittor
|
||||||
{
|
{
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include <pystate.h>
|
#include <pystate.h>
|
||||||
#include "misc/nano_string.h"
|
#include "misc/nano_string.h"
|
||||||
|
#include "aclnn.h"
|
||||||
|
|
||||||
namespace jittor
|
namespace jittor
|
||||||
{
|
{
|
||||||
|
|
|
@ -49,10 +49,13 @@ MpiReduceOp::MpiReduceOp(Var* x, NanoString op, int root) : x(x), op(op), root(r
|
||||||
forward(var);
|
forward(var);
|
||||||
return;
|
return;
|
||||||
} else if (hccl_reduce) {
|
} else if (hccl_reduce) {
|
||||||
|
auto var = hccl_reduce(x, "sum", root);
|
||||||
|
//exe.run_sync({var}, true);
|
||||||
forward(var);
|
forward(var);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
y = create_output(nullptr, x->dtype());
|
y = create_output(nullptr, x->dtype());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include "utils/log.h"
|
#include "utils/log.h"
|
||||||
#include "../extern/acl/aclnn/aclnn.h"
|
// #include "../extern/acl/aclnn/aclnn.h"
|
||||||
|
|
||||||
#define JIT_TEST(name) extern void jit_test_ ## name ()
|
#define JIT_TEST(name) extern void jit_test_ ## name ()
|
||||||
void expect_error(std::function<void()> func);
|
void expect_error(std::function<void()> func);
|
||||||
|
|
|
@ -31,9 +31,9 @@ cudaEvent_t event;
|
||||||
struct Init {
|
struct Init {
|
||||||
Init() {
|
Init() {
|
||||||
if (!get_device_count()) return;
|
if (!get_device_count()) return;
|
||||||
//checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||||
//checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
|
checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
|
||||||
stream = aclstream;
|
// stream = aclstream;
|
||||||
}
|
}
|
||||||
~Init() {
|
~Init() {
|
||||||
if (!get_device_count()) return;
|
if (!get_device_count()) return;
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
namespace jittor {
|
namespace jittor {
|
||||||
|
|
||||||
EXTERN_LIB aclrtStream aclstream;
|
// EXTERN_LIB aclrtStream aclstream;
|
||||||
|
|
||||||
CopyOp::CopyOp(Var* x) {
|
CopyOp::CopyOp(Var* x) {
|
||||||
flags.set(NodeFlags::_cpu);
|
flags.set(NodeFlags::_cpu);
|
||||||
|
|
|
@ -47,7 +47,7 @@ Init() {
|
||||||
if (!get_device_count()) return;
|
if (!get_device_count()) return;
|
||||||
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||||
checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
|
checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
|
||||||
stream = aclstream;
|
// stream = aclstream;
|
||||||
}
|
}
|
||||||
~Init() {
|
~Init() {
|
||||||
if (!get_device_count()) return;
|
if (!get_device_count()) return;
|
||||||
|
@ -123,11 +123,11 @@ void FetchOp::run() {
|
||||||
new (&allocation) Allocation(&cuda_dual_allocator, v->size);
|
new (&allocation) Allocation(&cuda_dual_allocator, v->size);
|
||||||
// mostly device to device
|
// mostly device to device
|
||||||
#if IS_CUDA
|
#if IS_CUDA
|
||||||
// checkCudaErrors(cudaMemcpyAsync(
|
|
||||||
// allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
|
||||||
checkCudaErrors(cudaMemcpyAsync(
|
checkCudaErrors(cudaMemcpyAsync(
|
||||||
allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
|
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
||||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
// checkCudaErrors(cudaMemcpyAsync(
|
||||||
|
// allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
|
||||||
|
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||||
#else
|
#else
|
||||||
checkCudaErrors(cudaMemcpyAsync(
|
checkCudaErrors(cudaMemcpyAsync(
|
||||||
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream));
|
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream));
|
||||||
|
@ -135,11 +135,11 @@ void FetchOp::run() {
|
||||||
auto host_ptr = cuda_dual_allocator.get_dual_allocation(
|
auto host_ptr = cuda_dual_allocator.get_dual_allocation(
|
||||||
allocation.allocation).host_ptr;
|
allocation.allocation).host_ptr;
|
||||||
// device to host
|
// device to host
|
||||||
// checkCudaErrors(cudaMemcpyAsync(
|
checkCudaErrors(cudaMemcpyAsync(
|
||||||
// host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
||||||
checkCudaErrors(aclrtMemcpyAsync(
|
// checkCudaErrors(aclrtMemcpyAsync(
|
||||||
host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
|
// host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
|
||||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||||
allocation.ptr = host_ptr;
|
allocation.ptr = host_ptr;
|
||||||
has_cuda_memcpy = true;
|
has_cuda_memcpy = true;
|
||||||
} else
|
} else
|
||||||
|
|
Loading…
Reference in New Issue