Revert "[libomptarget] Build a minimal deviceRTL for amdgcn"
This reverts commit 877ffa716f because it
breaks the build.
This commit is contained in:
parent
abe8de29c4
commit
02b9c5d963
|
|
@ -6,9 +6,8 @@
|
|||
#
|
||||
# ##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a device RTL for each available machine.
|
||||
# Build a device RTL for each available machine available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
add_subdirectory(amdgcn)
|
||||
add_subdirectory(nvptx)
|
||||
|
|
|
|||
|
|
@ -1,136 +0,0 @@
|
|||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build the AMDGCN Device RTL if the ROCM tools are available
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
find_package(LLVM QUIET CONFIG
|
||||
PATHS
|
||||
$ENV{AOMP}
|
||||
$ENV{HOME}/rocm/aomp
|
||||
/opt/rocm/aomp
|
||||
/usr/lib/rocm/aomp
|
||||
${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
|
||||
${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
|
||||
${CMAKE_CXX_COMPILER_DIR}
|
||||
NO_DEFAULT_PATH)
|
||||
|
||||
if (LLVM_DIR)
|
||||
libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
|
||||
else()
|
||||
libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})
|
||||
|
||||
if (AOMP_INSTALL_PREFIX)
|
||||
set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
|
||||
else()
|
||||
set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
|
||||
endif()
|
||||
|
||||
libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")
|
||||
|
||||
project(omptarget-amdgcn)
|
||||
|
||||
add_custom_target(omptarget-amdgcn ALL)
|
||||
|
||||
#optimization level
|
||||
set(optimization_level 2)
|
||||
|
||||
# Activate RTL message dumps if requested by the user.
|
||||
if(LIBOMPTARGET_NVPTX_DEBUG)
|
||||
set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
|
||||
endif()
|
||||
|
||||
get_filename_component(devicertl_base_directory
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DIRECTORY)
|
||||
|
||||
set(cuda_sources
|
||||
${devicertl_base_directory}/common/src/cancel.cu
|
||||
${devicertl_base_directory}/common/src/critical.cu)
|
||||
|
||||
set(h_files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/device_environment.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
|
||||
${devicertl_base_directory}/common/debug.h
|
||||
${devicertl_base_directory}/common/state-queue.h
|
||||
${devicertl_base_directory}/common/state-queuei.h
|
||||
${devicertl_base_directory}/common/support.h)
|
||||
|
||||
# for both in-tree and out-of-tree build
|
||||
if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
|
||||
set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
else()
|
||||
set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
|
||||
endif()
|
||||
|
||||
# create libraries
|
||||
set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
|
||||
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
|
||||
set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
|
||||
endif()
|
||||
|
||||
macro(add_cuda_bc_library)
|
||||
set(cu_cmd ${AOMP_BINDIR}/clang++
|
||||
-std=c++11
|
||||
-fcuda-rdc
|
||||
-fvisibility=default
|
||||
--cuda-device-only
|
||||
-Wno-unused-value
|
||||
-x hip
|
||||
-O${optimization_level}
|
||||
--cuda-gpu-arch=${mcpu}
|
||||
${CUDA_DEBUG}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
-I${devicertl_base_directory})
|
||||
|
||||
set(bc1_files)
|
||||
|
||||
foreach(file ${ARGN})
|
||||
get_filename_component(fname ${file} NAME_WE)
|
||||
set(bc1_filename ${fname}.${mcpu}.bc)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${bc1_filename}
|
||||
COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
|
||||
DEPENDS ${file} ${h_files})
|
||||
|
||||
list(APPEND bc1_files ${bc1_filename})
|
||||
endforeach()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT linkout.cuda.${mcpu}.bc
|
||||
COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
|
||||
DEPENDS ${bc1_files})
|
||||
|
||||
list(APPEND bc_files linkout.cuda.${mcpu}.bc)
|
||||
endmacro()
|
||||
|
||||
set(libname "omptarget-amdgcn")
|
||||
|
||||
foreach(mcpu ${mcpus})
|
||||
set(bc_files)
|
||||
add_cuda_bc_library(${cuda_sources})
|
||||
|
||||
set(bc_libname lib${libname}-${mcpu}.bc)
|
||||
add_custom_command(
|
||||
OUTPUT ${bc_libname}
|
||||
COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
|
||||
DEPENDS ${bc_files})
|
||||
|
||||
add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})
|
||||
|
||||
install(FILES ${OUTPUTDIR}/${bc_libname}
|
||||
DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
|
||||
)
|
||||
endforeach()
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Global device environment
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
|
||||
#define _OMPTARGET_DEVICE_ENVIRONMENT_H_
|
||||
|
||||
#include "target_impl.h"
|
||||
|
||||
struct omptarget_device_environmentTy {
|
||||
int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG
|
||||
// only useful for Debug build of deviceRTLs
|
||||
int32_t num_devices; // gets number of active offload devices
|
||||
int32_t device_num; // gets a value 0 to num_devices-1
|
||||
};
|
||||
|
||||
extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
|
||||
|
||||
#endif
|
||||
|
|
@ -72,6 +72,8 @@ EXTERN uint64_t __lanemask_lt();
|
|||
// thread's lane number in the warp
|
||||
EXTERN uint64_t __lanemask_gt();
|
||||
|
||||
EXTERN void llvm_amdgcn_s_barrier();
|
||||
|
||||
// CU id
|
||||
EXTERN unsigned __smid();
|
||||
|
||||
|
|
@ -99,21 +101,25 @@ INLINE uint32_t __kmpc_impl_smid() {
|
|||
return __smid();
|
||||
}
|
||||
|
||||
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
|
||||
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }
|
||||
|
||||
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
|
||||
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }
|
||||
|
||||
INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
|
||||
return __ballot64(1);
|
||||
}
|
||||
|
||||
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
int32_t SrcLane);
|
||||
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
int32_t SrcLane) {
|
||||
return __shfl(Var, SrcLane, WARPSIZE);
|
||||
}
|
||||
|
||||
EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
uint32_t Delta, int32_t Width);
|
||||
INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
uint32_t Delta, int32_t Width) {
|
||||
return __shfl_down(Var, Delta, Width);
|
||||
}
|
||||
|
||||
INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
|
||||
INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }
|
||||
|
||||
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
|
||||
// we have protected the master warp from releasing from its barrier
|
||||
|
|
@ -122,15 +128,4 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
|
|||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
// DEVICE versions of part of libc
|
||||
extern "C" {
|
||||
DEVICE __attribute__((noreturn)) void
|
||||
__assertfail(const char *, const char *, unsigned, const char *, size_t);
|
||||
INLINE static void __assert_fail(const char *__message, const char *__file,
|
||||
unsigned int __line, const char *__function) {
|
||||
__assertfail(__message, __file, __line, __function, sizeof(char));
|
||||
}
|
||||
DEVICE int printf(const char *, ...);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue