forked from OSchip/llvm-project
[AMDGPU][HIP] Improve opt-level handling
Summary: The HIP toolchain invokes `llc` without an explicit opt-level, meaning it always uses the default (-O2). This makes it impossible to use -O1, for example. The HIP toolchain also coerces -Os/-Oz to -O2 even when invoking opt, and it coerces -Og to -O2 rather than -O1. Forward the opt-level to `llc` as well as `opt`, and only coerce levels where it is required. Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70987
This commit is contained in:
parent
3c6b5d3674
commit
d96ea47c75
|
|
@ -62,6 +62,34 @@ static const char *getOutputFileName(Compilation &C, StringRef Base,
|
|||
}
|
||||
return OutputFileName;
|
||||
}
|
||||
|
||||
static void addOptLevelArgs(const llvm::opt::ArgList &Args,
|
||||
llvm::opt::ArgStringList &CmdArgs,
|
||||
bool IsLlc = false) {
|
||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
StringRef OOpt = "3";
|
||||
if (A->getOption().matches(options::OPT_O4) ||
|
||||
A->getOption().matches(options::OPT_Ofast))
|
||||
OOpt = "3";
|
||||
else if (A->getOption().matches(options::OPT_O0))
|
||||
OOpt = "0";
|
||||
else if (A->getOption().matches(options::OPT_O)) {
|
||||
// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
|
||||
// so we map -Os/-Oz to -O2.
|
||||
// Only clang supports -Og, and maps it to -O1.
|
||||
// We map anything else to -O2.
|
||||
OOpt = llvm::StringSwitch<const char *>(A->getValue())
|
||||
.Case("1", "1")
|
||||
.Case("2", "2")
|
||||
.Case("3", "3")
|
||||
.Case("s", IsLlc ? "2" : "s")
|
||||
.Case("z", IsLlc ? "2" : "z")
|
||||
.Case("g", "1")
|
||||
.Default("2");
|
||||
}
|
||||
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const char *AMDGCN::Linker::constructLLVMLinkCommand(
|
||||
|
|
@ -93,25 +121,7 @@ const char *AMDGCN::Linker::constructOptCommand(
|
|||
// The input to opt is the output from llvm-link.
|
||||
OptArgs.push_back(InputFileName);
|
||||
// Pass optimization arg to opt.
|
||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
StringRef OOpt = "3";
|
||||
if (A->getOption().matches(options::OPT_O4) ||
|
||||
A->getOption().matches(options::OPT_Ofast))
|
||||
OOpt = "3";
|
||||
else if (A->getOption().matches(options::OPT_O0))
|
||||
OOpt = "0";
|
||||
else if (A->getOption().matches(options::OPT_O)) {
|
||||
// -Os, -Oz, and -O(anything else) map to -O2
|
||||
OOpt = llvm::StringSwitch<const char *>(A->getValue())
|
||||
.Case("1", "1")
|
||||
.Case("2", "2")
|
||||
.Case("3", "3")
|
||||
.Case("s", "2")
|
||||
.Case("z", "2")
|
||||
.Default("2");
|
||||
}
|
||||
OptArgs.push_back(Args.MakeArgString("-O" + OOpt));
|
||||
}
|
||||
addOptLevelArgs(Args, OptArgs);
|
||||
OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
|
||||
OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
|
||||
|
||||
|
|
@ -136,10 +146,15 @@ const char *AMDGCN::Linker::constructLlcCommand(
|
|||
llvm::StringRef OutputFilePrefix, const char *InputFileName,
|
||||
bool OutputIsAsm) const {
|
||||
// Construct llc command.
|
||||
ArgStringList LlcArgs{
|
||||
InputFileName, "-mtriple=amdgcn-amd-amdhsa",
|
||||
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")),
|
||||
Args.MakeArgString("-mcpu=" + SubArchName)};
|
||||
ArgStringList LlcArgs;
|
||||
// The input to llc is the output from opt.
|
||||
LlcArgs.push_back(InputFileName);
|
||||
// Pass optimization arg to llc.
|
||||
addOptLevelArgs(Args, LlcArgs, /*IsLlc=*/true);
|
||||
LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
|
||||
LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
|
||||
LlcArgs.push_back(
|
||||
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
|
||||
|
||||
// Extract all the -m options
|
||||
std::vector<llvm::StringRef> Features;
|
||||
|
|
|
|||
|
|
@ -18,8 +18,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_803_BC:".*-gfx803-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_803_BC]]
|
||||
// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj"
|
||||
// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: {{.*}} "-mcpu=gfx803"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx803-.*o"}}
|
||||
|
||||
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
|
|
@ -33,6 +34,7 @@
|
|||
// CHECK-SAME: "-o" [[OPT_900_BC:".*-gfx900-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC]] [[OPT_900_BC]]
|
||||
// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" "-filetype=obj"
|
||||
// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: {{.*}} "-mcpu=gfx900"
|
||||
// CHECk-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-amdgpu-function-calls=0" "-o" {{".*-gfx900-.*o"}}
|
||||
|
|
|
|||
|
|
@ -34,8 +34,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".*-gfx803-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-mcpu=gfx803"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]]
|
||||
|
||||
// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
|
||||
|
|
@ -62,8 +63,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".*-gfx900-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-mcpu=gfx900"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]]
|
||||
|
||||
// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
|
||||
|
|
@ -106,8 +108,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".*-gfx803-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-mcpu=gfx803"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]]
|
||||
|
||||
// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]]
|
||||
|
|
@ -134,8 +137,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".*-gfx900-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECk-SAME: "-mcpu=gfx900"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]]
|
||||
|
||||
// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,101 @@
|
|||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,DEFAULT %s
|
||||
|
||||
// RUN: %clang -### -O0 \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,O0 %s
|
||||
|
||||
// RUN: %clang -### -O1 \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,O1 %s
|
||||
|
||||
// RUN: %clang -### -O2 \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,O2 %s
|
||||
|
||||
// RUN: %clang -### -O3 \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,O3 %s
|
||||
|
||||
// RUN: %clang -### -Os \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,Os %s
|
||||
|
||||
// RUN: %clang -### -Oz \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,Oz %s
|
||||
|
||||
// RUN: %clang -### -Og \
|
||||
// RUN: -target x86_64-unknown-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpulib \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck --check-prefixes=ALL,Og %s
|
||||
|
||||
// ALL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// DEFAULT-NOT: "-O{{.}}"
|
||||
// O0-SAME: "-O0"
|
||||
// O1-SAME: "-O1"
|
||||
// O2-SAME: "-O2"
|
||||
// O3-SAME: "-O3"
|
||||
// Os-SAME: "-Os"
|
||||
// Oz-SAME: "-Oz"
|
||||
// Og-SAME: "-Og"
|
||||
|
||||
// ALL: "{{.*}}opt"
|
||||
// DEFAULT-NOT: "-O{{.}}"
|
||||
// O0-SAME: "-O0"
|
||||
// O1-SAME: "-O1"
|
||||
// O2-SAME: "-O2"
|
||||
// O3-SAME: "-O3"
|
||||
// Os-SAME: "-Os"
|
||||
// Oz-SAME: "-Oz"
|
||||
// Og-SAME: "-O1"
|
||||
// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa"
|
||||
|
||||
// ALL: "{{.*}}llc"
|
||||
// DEFAULT-NOT: "-O{{.}}"
|
||||
// O0-SAME: "-O0"
|
||||
// O1-SAME: "-O1"
|
||||
// O2-SAME: "-O2"
|
||||
// O3-SAME: "-O3"
|
||||
// Os-SAME: "-O2"
|
||||
// Oz-SAME: "-O2"
|
||||
// Og-SAME: "-O1"
|
||||
// ALL-SAME: "-mtriple=amdgcn-amd-amdhsa"
|
||||
|
||||
// ALL: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
|
||||
// DEFAULT-NOT: "-O{{.}}"
|
||||
// O0-SAME: "-O0"
|
||||
// O1-SAME: "-O1"
|
||||
// O2-SAME: "-O2"
|
||||
// O3-SAME: "-O3"
|
||||
// Os-SAME: "-Os"
|
||||
// Oz-SAME: "-Oz"
|
||||
// Og-SAME: "-Og"
|
||||
|
|
@ -40,13 +40,14 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV1:".*-gfx803-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-mcpu=gfx803"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV1:".*-gfx803-.*o"]]
|
||||
|
||||
// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]]
|
||||
|
||||
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// CHECK-SAME: "-emit-llvm-bc"
|
||||
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
|
||||
|
|
@ -72,8 +73,9 @@
|
|||
// CHECK-SAME: "-o" [[OPT_BC_DEV2:".*-gfx900-optimized.*bc"]]
|
||||
|
||||
// CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa"
|
||||
// CHECk-SAME: "-mcpu=gfx900"
|
||||
// CHECK-SAME: "-filetype=obj"
|
||||
// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV2:".*-gfx900-.*o"]]
|
||||
// CHECK-SAME: "-o" [[OBJ_DEV2:".*-gfx900-.*o"]]
|
||||
|
||||
// CHECK: [[LLD]] "-flavor" "gnu" "-shared"
|
||||
// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]]
|
||||
|
|
|
|||
Loading…
Reference in New Issue