mirror of https://github.com/Jittor/Jittor
enable cuda and acl
This commit is contained in:
parent
f8e44de79d
commit
c78db2a794
|
@ -457,8 +457,7 @@ def setup_cutt():
|
|||
def install_cutlass(root_folder):
|
||||
# Modified from: https://github.com/ap-hynninen/cutlass
|
||||
# url = "https://cloud.tsinghua.edu.cn/f/171e49e5825549548bc4/?dl=1"
|
||||
# url = "https://cg.cs.tsinghua.edu.cn/jittor/assets/cutlass.zip"
|
||||
url = "https://cloud.tsinghua.edu.cn/f/171e49e5825549548bc4/?dl=1"
|
||||
url = "https://cg.cs.tsinghua.edu.cn/jittor/assets/cutlass.zip"
|
||||
|
||||
filename = "cutlass.zip"
|
||||
fullname = os.path.join(root_folder, filename)
|
||||
|
|
|
@ -1186,20 +1186,22 @@ make_cache_dir(os.path.join(cache_path, "tmp"))
|
|||
ck_path = os.path.join(cache_path, "checkpoints")
|
||||
make_cache_dir(ck_path)
|
||||
|
||||
|
||||
ascend_toolkit_home = os.getenv('ASCEND_TOOLKIT_HOME')
|
||||
|
||||
# build cache_compile
|
||||
cc_flags += f" -I\"{os.path.join(jittor_path, 'src')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(jittor_path, 'extern')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" "
|
||||
cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" "
|
||||
cc_flags += " -llibascendcl "
|
||||
cc_flags += " -llibnnopbase "
|
||||
cc_flags += " -llibopapi "
|
||||
|
||||
ascend_toolkit_home = os.getenv('ASCEND_TOOLKIT_HOME')
|
||||
|
||||
if ascend_toolkit_home:
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/acl')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnn')}\" "
|
||||
cc_flags += f" -I\"{os.path.join(ascend_toolkit_home, 'include/aclnnop')}\" "
|
||||
cc_flags += f" -L\"{os.path.join(ascend_toolkit_home, 'lib64')}\" "
|
||||
cc_flags += " -llibascendcl "
|
||||
cc_flags += " -llibnnopbase "
|
||||
cc_flags += " -llibopapi "
|
||||
|
||||
cc_flags += py_include
|
||||
|
||||
check_cache_compile()
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "common.h"
|
||||
#include "aclnn/aclnn.h"
|
||||
#include <acl/acl.h>
|
||||
|
||||
std::string acl_error_to_string(aclError error);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <Python.h>
|
||||
#include <pystate.h>
|
||||
#include "utils.h"
|
||||
#include "aclnn/aclnn.h"
|
||||
|
||||
namespace jittor
|
||||
{
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <Python.h>
|
||||
#include <pystate.h>
|
||||
#include "misc/nano_string.h"
|
||||
#include "aclnn/aclnn.h"
|
||||
|
||||
namespace jittor
|
||||
{
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include <memory>
|
||||
#include <functional>
|
||||
#include "utils/log.h"
|
||||
#include "../extern/acl/aclnn/aclnn.h"
|
||||
|
||||
#define JIT_TEST(name) extern void jit_test_ ## name ()
|
||||
void expect_error(std::function<void()> func);
|
||||
|
|
|
@ -47,7 +47,7 @@ Init() {
|
|||
if (!get_device_count()) return;
|
||||
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
checkCudaErrors(cudaEventCreate(&event, cudaEventDisableTiming));
|
||||
stream = aclstream;
|
||||
// stream = aclstream;
|
||||
}
|
||||
~Init() {
|
||||
if (!get_device_count()) return;
|
||||
|
@ -123,11 +123,11 @@ void FetchOp::run() {
|
|||
new (&allocation) Allocation(&cuda_dual_allocator, v->size);
|
||||
// mostly device to device
|
||||
#if IS_CUDA
|
||||
// checkCudaErrors(cudaMemcpyAsync(
|
||||
// allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
|
||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDefault, stream));
|
||||
// checkCudaErrors(cudaMemcpyAsync(
|
||||
// allocation.ptr, v->size, v->mem_ptr, v->size, cudaMemcpyDefault, aclstream));
|
||||
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
#else
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
allocation.ptr, v->mem_ptr, v->size, cudaMemcpyDeviceToDevice, stream));
|
||||
|
@ -135,11 +135,11 @@ void FetchOp::run() {
|
|||
auto host_ptr = cuda_dual_allocator.get_dual_allocation(
|
||||
allocation.allocation).host_ptr;
|
||||
// device to host
|
||||
// checkCudaErrors(cudaMemcpyAsync(
|
||||
// host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
||||
checkCudaErrors(aclrtMemcpyAsync(
|
||||
host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
|
||||
checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
host_ptr, allocation.ptr, v->size, cudaMemcpyDeviceToHost, stream));
|
||||
// checkCudaErrors(aclrtMemcpyAsync(
|
||||
// host_ptr, v->size, allocation.ptr, v->size, cudaMemcpyDeviceToHost, aclstream));
|
||||
// checkCudaErrors(aclrtSynchronizeStream(aclstream));
|
||||
allocation.ptr = host_ptr;
|
||||
has_cuda_memcpy = true;
|
||||
} else
|
||||
|
|
Loading…
Reference in New Issue