Add PPC FP rounding instructions fri[mnpz]

These instructions are available on the P5x (and later) and on the A2. They implement the standard floating-point rounding operations (floor, trunc, etc.). One caveat: frin (round to nearest) does not implement "ties to even", and so is only enabled in fast-math mode. llvm-svn: 178337
2013-03-29 08:57:48 +00:00 · 2013-03-29 08:57:48 +00:00 · c20a08d25b
parent 703a9870a2
commit c20a08d25b
7 changed files with 190 additions and 30 deletions
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@ -59,6 +59,8 @@ def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                        "Enable the fsqrt instruction">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                        "Enable the stfiwx instruction">;
 def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
                                        "Enable the fri[mnpz] instructions">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                        "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
@ -76,7 +78,6 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // CMPB         p6, p6x, p7        cmpb
 // DFP          p6, p6x, p7        decimal floating-point instructions
 // FLT_CVT      p7                 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
 // FPRND        p5x, p6, p6x, p7   frim, frin, frip, friz
 // FRE          p5 through p7      fre (vs. fres, available since p3)
 // FRSQRTES     p5 through p7      frsqrtes (vs. frsqrte, available since p3)
 // LFIWAX       p6, p6x, p7        lfiwax
@ -132,14 +133,14 @@ def : ProcessorModel<"e5500", PPCE5500Model,
                   FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
 def : Processor<"a2", PPCA2Itineraries,
                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
-                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
-               /*, Feature64BitRegs */]>;
+                   Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"a2q", PPCA2Itineraries,
                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
-                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
-               /*, Feature64BitRegs */, FeatureQPX]>;
+                   Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>;
 def : Processor<"pwr3", G5Itineraries,
                  [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
                   FeatureSTFIWX, Feature64Bit]>;
@ -151,19 +152,21 @@ def : Processor<"pwr5", G5Itineraries,
                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
 def : Processor<"pwr5x", G5Itineraries,
                  [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
                   Feature64Bit]>;
 def : Processor<"pwr6", G5Itineraries,
                  [DirectivePwr6, FeatureAltivec,
                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"pwr6x", G5Itineraries,
                  [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
                   Feature64Bit]>;
 def : Processor<"pwr7", G5Itineraries,
                  [DirectivePwr7, FeatureAltivec,
                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+                   FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                  [Directive64, FeatureAltivec,
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -158,6 +158,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
  if (Subtarget->hasFPRND()) {
    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
    // frin does not implement "ties to even." Thus, this is safe only in
    // fast-math mode.
    if (TM.Options.UnsafeFPMath) {
      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
    }
  }
  // PowerPC does not have BSWAP, CTPOP or CTTZ
  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@ -1128,9 +1128,38 @@ let Uses = [RM] in {
  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
                        "fctiwz $frD, $frB", FPGeneral,
                        [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
                        "frsp $frD, $frB", FPGeneral,
                        [(set f32:$frD, (fround f64:$frB))]>;
  // The frin -> nearbyint mapping is valid only in fast-math mode.
  def FRIND  : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
                        "frin $frD, $frB", FPGeneral,
                        [(set f64:$frD, (fnearbyint f64:$frB))]>;
  def FRINS  : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
                        "frin $frD, $frB", FPGeneral,
                        [(set f32:$frD, (fnearbyint f32:$frB))]>;
  def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
                        "frip $frD, $frB", FPGeneral,
                        [(set f64:$frD, (fceil f64:$frB))]>;
  def FRIPS  : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
                        "frip $frD, $frB", FPGeneral,
                        [(set f32:$frD, (fceil f32:$frB))]>;
  def FRIZD  : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
                        "friz $frD, $frB", FPGeneral,
                        [(set f64:$frD, (ftrunc f64:$frB))]>;
  def FRIZS  : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
                        "friz $frD, $frB", FPGeneral,
                        [(set f32:$frD, (ftrunc f32:$frB))]>;
  def FRIMD  : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
                        "frim $frD, $frB", FPGeneral,
                        [(set f64:$frD, (ffloor f64:$frB))]>;
  def FRIMS  : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
                        "frim $frD, $frB", FPGeneral,
                        [(set f32:$frD, (ffloor f32:$frB))]>;
  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
                        "fsqrt $frD, $frB", FPSqrt,
                        [(set f64:$frD, (fsqrt f64:$frB))]>;
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
  , HasQPX(false)
  , HasFSQRT(false)
  , HasSTFIWX(false)
  , HasFPRND(false)
  , HasISEL(false)
  , HasPOPCNTD(false)
  , HasLDBRX(false)
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@ -78,6 +78,7 @@ protected:
  bool HasQPX;
  bool HasFSQRT;
  bool HasSTFIWX;
  bool HasFPRND;
  bool HasISEL;
  bool HasPOPCNTD;
  bool HasLDBRX;
@ -157,6 +158,7 @@ public:
  // Specific obvious features.
  bool hasFSQRT() const { return HasFSQRT; }
  bool hasSTFIWX() const { return HasSTFIWX; }
  bool hasFPRND() const { return HasFPRND; }
  bool hasAltivec() const { return HasAltivec; }
  bool hasQPX() const { return HasQPX; }
  bool hasMFOCRF() const { return HasMFOCRF; }
--- a/llvm/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/rounding-ops.ll
@ -0,0 +1,108 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 define float @test1(float %x) nounwind  {
  %call = tail call float @floorf(float %x) nounwind readnone
  ret float %call
 ; CHECK: test1:
 ; CHECK: frim 1, 1
 ; CHECK-FM: test1:
 ; CHECK-FM: frim 1, 1
 }
 declare float @floorf(float) nounwind readnone
 define double @test2(double %x) nounwind  {
  %call = tail call double @floor(double %x) nounwind readnone
  ret double %call
 ; CHECK: test2:
 ; CHECK: frim 1, 1
 ; CHECK-FM: test2:
 ; CHECK-FM: frim 1, 1
 }
 declare double @floor(double) nounwind readnone
 define float @test3(float %x) nounwind  {
  %call = tail call float @nearbyintf(float %x) nounwind readnone
  ret float %call
 ; CHECK: test3:
 ; CHECK-NOT: frin
 ; CHECK-FM: test3:
 ; CHECK-FM: frin 1, 1
 }
 declare float @nearbyintf(float) nounwind readnone
 define double @test4(double %x) nounwind  {
  %call = tail call double @nearbyint(double %x) nounwind readnone
  ret double %call
 ; CHECK: test4:
 ; CHECK-NOT: frin
 ; CHECK-FM: test4:
 ; CHECK-FM: frin 1, 1
 }
 declare double @nearbyint(double) nounwind readnone
 define float @test5(float %x) nounwind  {
  %call = tail call float @ceilf(float %x) nounwind readnone
  ret float %call
 ; CHECK: test5:
 ; CHECK: frip 1, 1
 ; CHECK-FM: test5:
 ; CHECK-FM: frip 1, 1
 }
 declare float @ceilf(float) nounwind readnone
 define double @test6(double %x) nounwind  {
  %call = tail call double @ceil(double %x) nounwind readnone
  ret double %call
 ; CHECK: test6:
 ; CHECK: frip 1, 1
 ; CHECK-FM: test6:
 ; CHECK-FM: frip 1, 1
 }
 declare double @ceil(double) nounwind readnone
 define float @test9(float %x) nounwind  {
  %call = tail call float @truncf(float %x) nounwind readnone
  ret float %call
 ; CHECK: test9:
 ; CHECK: friz 1, 1
 ; CHECK-FM: test9:
 ; CHECK-FM: friz 1, 1
 }
 declare float @truncf(float) nounwind readnone
 define double @test10(double %x) nounwind  {
  %call = tail call double @trunc(double %x) nounwind readnone
  ret double %call
 ; CHECK: test10:
 ; CHECK: friz 1, 1
 ; CHECK-FM: test10:
 ; CHECK-FM: friz 1, 1
 }
 declare double @trunc(double) nounwind readnone
--- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll
@ -13,8 +13,8 @@ define <2 x double> @floor_v2f64(<2 x double> %p)
  ret <2 x double> %t
 }
 ; CHECK: floor_v2f64:
-; CHECK: bl floor
+; CHECK: frim
-; CHECK: bl floor
+; CHECK: frim
 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
 define <4 x double> @floor_v4f64(<4 x double> %p)
@ -23,10 +23,10 @@ define <4 x double> @floor_v4f64(<4 x double> %p)
  ret <4 x double> %t
 }
 ; CHECK: floor_v4f64:
-; CHECK: bl floor
+; CHECK: frim
-; CHECK: bl floor
+; CHECK: frim
-; CHECK: bl floor
+; CHECK: frim
-; CHECK: bl floor
+; CHECK: frim
 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
 define <2 x double> @ceil_v2f64(<2 x double> %p)
@ -35,8 +35,8 @@ define <2 x double> @ceil_v2f64(<2 x double> %p)
  ret <2 x double> %t
 }
 ; CHECK: ceil_v2f64:
-; CHECK: bl ceil
+; CHECK: frip
-; CHECK: bl ceil
+; CHECK: frip
 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
 define <4 x double> @ceil_v4f64(<4 x double> %p)
@ -45,10 +45,10 @@ define <4 x double> @ceil_v4f64(<4 x double> %p)
  ret <4 x double> %t
 }
 ; CHECK: ceil_v4f64:
-; CHECK: bl ceil
+; CHECK: frip
-; CHECK: bl ceil
+; CHECK: frip
-; CHECK: bl ceil
+; CHECK: frip
-; CHECK: bl ceil
+; CHECK: frip
 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
 define <2 x double> @trunc_v2f64(<2 x double> %p)
@ -57,8 +57,8 @@ define <2 x double> @trunc_v2f64(<2 x double> %p)
  ret <2 x double> %t
 }
 ; CHECK: trunc_v2f64:
-; CHECK: bl trunc
+; CHECK: friz
-; CHECK: bl trunc
+; CHECK: friz
 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
 define <4 x double> @trunc_v4f64(<4 x double> %p)
@ -67,10 +67,10 @@ define <4 x double> @trunc_v4f64(<4 x double> %p)
  ret <4 x double> %t
 }
 ; CHECK: trunc_v4f64:
-; CHECK: bl trunc
+; CHECK: friz
-; CHECK: bl trunc
+; CHECK: friz
-; CHECK: bl trunc
+; CHECK: friz
-; CHECK: bl trunc
+; CHECK: friz
 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
 define <2 x double> @nearbyint_v2f64(<2 x double> %p)