From 75818bc8f76b317ae79fdf69428ee6a2e1630fb2 Mon Sep 17 00:00:00 2001 From: Joel Jones Date: Wed, 30 Nov 2016 22:25:24 +0000 Subject: [PATCH] [AArch64] Refactor LSE support as feature separate from V8.1a support. Summary: This is preparation for ThunderX processors that have Large System Extension (LSE) atomic instructions, but not the other instructions introduced by V8.1a. This will mimic changes to GCC as described here: https://gcc.gnu.org/ml/gcc-patches/2015-06/msg00388.html LSE instructions are: LD/ST, CAS*, SWP Reviewers: t.p.northover, echristo, jmolloy, rengolin Subscribers: aemerson, mehdi_amini Differential Revision: https://reviews.llvm.org/D26621 llvm-svn: 288279 --- llvm/include/llvm/Support/AArch64TargetParser.def | 7 ++++--- llvm/include/llvm/Support/TargetParser.h | 3 ++- llvm/lib/Target/AArch64/AArch64.td | 5 ++++- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 11 +++++++---- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 ++ .../lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- llvm/test/MC/AArch64/arm64v8.1-diagno-predicate.s | 8 ++++++++ llvm/test/MC/AArch64/directive-arch-negative.s | 6 ++++++ llvm/test/MC/AArch64/directive-cpu.s | 11 +++++++++++ 10 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 llvm/test/MC/AArch64/arm64v8.1-diagno-predicate.s diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 1171bb29f110..52fc39d40bea 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -21,15 +21,15 @@ AARCH64_ARCH("invalid", AK_INVALID, nullptr, nullptr, AARCH64_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD)) + AArch64::AEK_SIMD | AArch64::AEK_LSE)) AARCH64_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD)) + AArch64::AEK_SIMD | AArch64::AEK_LSE)) AARCH64_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_RAS)) + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE)) #undef AARCH64_ARCH #ifndef AARCH64_ARCH_EXT_NAME @@ -39,6 +39,7 @@ AARCH64_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index ebe276a4d1a8..63aeca7f4e1e 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -162,7 +162,8 @@ enum ArchExtKind : unsigned { AEK_SIMD = 0x10, AEK_FP16 = 0x20, AEK_PROFILE = 0x40, - AEK_RAS = 0x80 + AEK_RAS = 0x80, + AEK_LSE = 0x100 }; StringRef getCanonicalArchName(StringRef Arch); diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index c6afa552ecf9..65d5e8fe1f26 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -35,6 +35,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; + def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable ARMv8 PMUv3 Performance Monitors extension">; @@ -111,7 +114,7 @@ def FeatureUseRSqrt : SubtargetFeature< // def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", - "Support ARM v8.1a instructions", [FeatureCRC]>; + "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE]>; def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index d6617617381e..cefdf51b50d2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -9348,7 +9348,7 @@ class SHAInstSS opc, string asm, Intrinsic OpNode> // ST{}[] , [] // ST{} , [] -let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in class BaseCASEncoding pattern> : I { @@ -9369,6 +9369,7 @@ class BaseCASEncoding @@ -9401,7 +9402,7 @@ multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { def d : BaseCASP; } -let Predicates = [HasV8_1a] in +let Predicates = [HasLSE] in class BaseSWP : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, "\t$Rs, $Rt, [$Rn]","",[]>, @@ -9424,6 +9425,7 @@ class BaseSWP let Inst{11-10} = 0b00; let Inst{9-5} = Rn; let Inst{4-0} = Rt; + let Predicates = [HasLSE]; } multiclass Swap Acq, bits<1> Rel, string order> { @@ -9433,7 +9435,7 @@ multiclass Swap Acq, bits<1> Rel, string order> { let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP; } -let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in class BaseLDOPregister : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, "\t$Rs, $Rt, [$Rn]","",[]>, @@ -9456,6 +9458,7 @@ class BaseLDOPregister let Inst{11-10} = 0b00; let Inst{9-5} = Rn; let Inst{4-0} = Rt; + let Predicates = [HasLSE]; } multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, @@ -9470,7 +9473,7 @@ multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, def d : BaseLDOPregister; } -let Predicates = [HasV8_1a] in +let Predicates = [HasLSE] in class BaseSTOPregister : InstAlias; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 3bed50016b40..c5b95f282ea8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -26,6 +26,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasLSE : Predicate<"Subtarget->hasLSE()">, + AssemblerPredicate<"FeatureLSE", "lse">; def HasRAS : Predicate<"Subtarget->hasRAS()">, AssemblerPredicate<"FeatureRAS", "ras">; def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 5428c453d581..73f63b8b9f67 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -59,6 +59,7 @@ protected: bool HasNEON = false; bool HasCrypto = false; bool HasCRC = false; + bool HasLSE = false; bool HasRAS = false; bool HasPerfMon = false; bool HasFullFP16 = false; @@ -180,6 +181,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasLSE() const { return HasLSE; } bool hasRAS() const { return HasRAS; } bool balanceFPOps() const { return BalanceFPOps; } bool predictableSelectIsExpensive() const { diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 36d3abbd44d6..402b1e3e2236 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4137,9 +4137,9 @@ static const struct { { "fp", {AArch64::FeatureFPARMv8} }, { "simd", {AArch64::FeatureNEON} }, { "ras", {AArch64::FeatureRAS} }, + { "lse", {AArch64::FeatureLSE} }, // FIXME: Unsupported extensions - { "lse", {} }, { "pan", {} }, { "lor", {} }, { "rdma", {} }, diff --git a/llvm/test/MC/AArch64/arm64v8.1-diagno-predicate.s b/llvm/test/MC/AArch64/arm64v8.1-diagno-predicate.s new file mode 100644 index 000000000000..9540d295c8fb --- /dev/null +++ b/llvm/test/MC/AArch64/arm64v8.1-diagno-predicate.s @@ -0,0 +1,8 @@ +// RUN: not llvm-mc -triple=arm64-linux-gnu -mattr=armv8.1a -mattr=-lse < %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + + casa w5, w7, [x20] +// CHECK-ERROR: error: instruction requires: lse +// CHECK-ERROR-NEXT: casa w5, w7, [x20] +// CHECK-ERROR-NEXT: ^ + diff --git a/llvm/test/MC/AArch64/directive-arch-negative.s b/llvm/test/MC/AArch64/directive-arch-negative.s index 327389de0249..43ccd7927252 100644 --- a/llvm/test/MC/AArch64/directive-arch-negative.s +++ b/llvm/test/MC/AArch64/directive-arch-negative.s @@ -35,3 +35,9 @@ # CHECK: error: instruction requires: ras # CHECK: esb + + .arch armv8.1-a+nolse + casa w5, w7, [x20] + +# CHECK: error: instruction requires: lse +# CHECK: casa w5, w7, [x20] diff --git a/llvm/test/MC/AArch64/directive-cpu.s b/llvm/test/MC/AArch64/directive-cpu.s index 8e7d45337445..d645e54e470e 100644 --- a/llvm/test/MC/AArch64/directive-cpu.s +++ b/llvm/test/MC/AArch64/directive-cpu.s @@ -36,6 +36,12 @@ aesd v0.16b, v2.16b + .cpu generic+v8.1a+nolse + casa w5, w7, [x20] + + .cpu generic+v8.1a+lse + casa w5, w7, [x20] + // NOTE: the errors precede the actual output! The errors appear in order // though, so validate by hoisting them to the top and preservering relative // ordering @@ -56,8 +62,13 @@ // CHECK: aesd v0.16b, v2.16b // CHECK: ^ +// CHECK: error: instruction requires: lse +// CHECK: casa w5, w7, [x20] +// CHECK: ^ + // CHECK: fminnm d0, d0, d1 // CHECK: fminnm d0, d0, d1 // CHECK: addp v0.4s, v0.4s, v0.4s // CHECK: crc32cx w0, w1, x3 // CHECK: aesd v0.16b, v2.16b +// CHECK: casa w5, w7, [x20]