Merge pull request #646 from Jittor/fixHW

adjust aclnn.h reference
This commit is contained in:
Yuxuan Han 2025-06-17 13:34:28 +08:00 committed by GitHub
commit daf04e9fb5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 32 additions and 24 deletions

View File

@ -1191,15 +1191,17 @@ ascend_toolkit_home = os.getenv('ASCEND_TOOLKIT_HOME')
# build cache_compile # build cache_compile
cc_flags += f" -I\"{os.path.join(jittor_path, 'src')}\" " cc_flags += f" -I\"{os.path.join(jittor_path, 'src')}\" "
cc_flags += f" -I\"{os.path.join(jittor_path, 'extern')}\" "
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" " if ascend_toolkit_home:
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" " cc_flags += f" -I\"{os.path.join(jittor_path, 'extern')}\" "
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" " cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" "
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" " cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" "
cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" " cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" "
cc_flags += " -llibascendcl " cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" "
cc_flags += " -llibnnopbase " cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" "
cc_flags += " -llibopapi " cc_flags += " -llibascendcl "
cc_flags += " -llibnnopbase "
cc_flags += " -llibopapi "
cc_flags += py_include cc_flags += py_include

View File

@ -7,6 +7,7 @@
#pragma once #pragma once
#include "common.h" #include "common.h"
#include <acl/acl.h> #include <acl/acl.h>
#include "aclnn.h"
std::string acl_error_to_string(aclError error); std::string acl_error_to_string(aclError error);

View File

@ -5,6 +5,7 @@
#include <Python.h> #include <Python.h>
#include <pystate.h> #include <pystate.h>
#include "utils.h" #include "utils.h"
#include "aclnn.h"
namespace jittor namespace jittor
{ {

View File

@ -6,6 +6,7 @@
#include <Python.h> #include <Python.h>
#include <pystate.h> #include <pystate.h>
#include "misc/nano_string.h" #include "misc/nano_string.h"
#include "aclnn.h"
namespace jittor namespace jittor
{ {

View File

@ -49,10 +49,13 @@ MpiReduceOp::MpiReduceOp(Var* x, NanoString op, int root) : x(x), op(op), root(r
forward(var); forward(var);
return; return;
} else if (hccl_reduce) { } else if (hccl_reduce) {
auto var = hccl_reduce(x, "sum", root);
//exe.run_sync({var}, true);
forward(var); forward(var);
return; return;
} }
} }
#endif
y = create_output(nullptr, x->dtype()); y = create_output(nullptr, x->dtype());
} }

View File

@ -8,7 +8,7 @@
#include <memory> #include <memory>
#include <functional> #include <functional>
#include "utils/log.h" #include "utils/log.h"
#include "../extern/acl/aclnn/aclnn.h" // #include "../extern/acl/aclnn/aclnn.h"
#define JIT_TEST(name) extern void jit_test_ ## name () #define JIT_TEST(name) extern void jit_test_ ## name ()
void expect_error(std::function<void()> func); void expect_error(std::function<void()> func);

View File

@ -31,9 +31,9 @@ cudaEvent_t event;
struct Init { struct Init {
Init() { Init() {
if (!get_device_count()) return; if (!get_device_count()) return;
//checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
//checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming)); checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
stream = aclstream; // stream = aclstream;
} }
~Init() { ~Init() {
if (!get_device_count()) return; if (!get_device_count()) return;

View File

@ -17,7 +17,7 @@
namespace jittor { namespace jittor {
EXTERN_LIB aclrtStream aclstream; // EXTERN_LIB aclrtStream aclstream;
CopyOp::CopyOp(Var* x) { CopyOp::CopyOp(Var* x) {
flags.set(NodeFlags::_cpu); flags.set(NodeFlags::_cpu);

View File

@ -47,7 +47,7 @@ Init() {
if (!get_device_count()) return; if (!get_device_count()) return;
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming)); checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
stream = aclstream; // stream = aclstream;
} }
~Init() { ~Init() {
if (!get_device_count()) return; if (!get_device_count()) return;
@ -123,11 +123,11 @@ void FetchOp::run() {
new (&allocation) Allocation(&cuda_dual_allocator, v->size); new (&allocation) Allocation(&cuda_dual_allocator, v->size);
// mostly device to device // mostly device to device
#if IS_CUDA #if IS_CUDA
// checkCudaErrors(cudaMemcpyAsync(
// allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
checkCudaErrors(cudaMemcpyAsync( checkCudaErrors(cudaMemcpyAsync(
allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream)); allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
checkCudaErrors(aclrtSynchronizeStream(aclstream)); // checkCudaErrors(cudaMemcpyAsync(
// allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
#else #else
checkCudaErrors(cudaMemcpyAsync( checkCudaErrors(cudaMemcpyAsync(
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream)); allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream));
@ -135,11 +135,11 @@ void FetchOp::run() {
auto host_ptr = cuda_dual_allocator.get_dual_allocation( auto host_ptr = cuda_dual_allocator.get_dual_allocation(
allocation.allocation).host_ptr; allocation.allocation).host_ptr;
// device to host // device to host
// checkCudaErrors(cudaMemcpyAsync( checkCudaErrors(cudaMemcpyAsync(
// host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream)); host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
checkCudaErrors(aclrtMemcpyAsync( // checkCudaErrors(aclrtMemcpyAsync(
host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream)); // host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
checkCudaErrors(aclrtSynchronizeStream(aclstream)); // checkCudaErrors(aclrtSynchronizeStream(aclstream));
allocation.ptr = host_ptr; allocation.ptr = host_ptr;
has_cuda_memcpy = true; has_cuda_memcpy = true;
} else } else