forked from OSchip/llvm-project
				
			Add some missing isel predicates on def : pat patterns to avoid generating VFP vmla / vmls (they cause stalls). Disabling them in isel is properly not a right solution, I'll look into a proper solution next.
llvm-svn: 118922
This commit is contained in:
		
							parent
							
								
									6bb1ae9d45
								
							
						
					
					
						commit
						2d59ee34f1
					
				| 
						 | 
					@ -168,7 +168,8 @@ def : Processor<"cortex-a8",        CortexA8Itineraries,
 | 
				
			||||||
                                    [ArchV7A, ProcA8,
 | 
					                                    [ArchV7A, ProcA8,
 | 
				
			||||||
                                     FeatureHasSlowVMLx, FeatureT2XtPk]>;
 | 
					                                     FeatureHasSlowVMLx, FeatureT2XtPk]>;
 | 
				
			||||||
def : Processor<"cortex-a9",        CortexA9Itineraries,
 | 
					def : Processor<"cortex-a9",        CortexA9Itineraries,
 | 
				
			||||||
                                    [ArchV7A, ProcA9, FeatureT2XtPk]>;
 | 
					                                    [ArchV7A, ProcA9,
 | 
				
			||||||
 | 
					                                     FeatureHasSlowVMLx, FeatureT2XtPk]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// V7M Processors.
 | 
					// V7M Processors.
 | 
				
			||||||
def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
 | 
					def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1577,33 +1577,6 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
 | 
				
			||||||
  let Inst{4}     = op4;
 | 
					  let Inst{4}     = op4;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Double precision, binary, VML[AS] (for additional predicate)
 | 
					 | 
				
			||||||
class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
 | 
					 | 
				
			||||||
           dag iops, InstrItinClass itin, string opc, string asm,
 | 
					 | 
				
			||||||
           list<dag> pattern>
 | 
					 | 
				
			||||||
  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
 | 
					 | 
				
			||||||
  // Instruction operands.
 | 
					 | 
				
			||||||
  bits<5> Dd;
 | 
					 | 
				
			||||||
  bits<5> Dn;
 | 
					 | 
				
			||||||
  bits<5> Dm;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Encode instruction operands.
 | 
					 | 
				
			||||||
  let Inst{19-16} = Dn{3-0};
 | 
					 | 
				
			||||||
  let Inst{7}     = Dn{4};
 | 
					 | 
				
			||||||
  let Inst{15-12} = Dd{3-0};
 | 
					 | 
				
			||||||
  let Inst{22}    = Dd{4};
 | 
					 | 
				
			||||||
  let Inst{3-0}   = Dm{3-0};
 | 
					 | 
				
			||||||
  let Inst{5}     = Dm{4};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  let Inst{27-23} = opcod1;
 | 
					 | 
				
			||||||
  let Inst{21-20} = opcod2;
 | 
					 | 
				
			||||||
  let Inst{11-9}  = 0b101;
 | 
					 | 
				
			||||||
  let Inst{8}     = 1;          // Double precision
 | 
					 | 
				
			||||||
  let Inst{6}     = op6;
 | 
					 | 
				
			||||||
  let Inst{4}     = op4;
 | 
					 | 
				
			||||||
  list<Predicate> Predicates = [HasVFP2, UseVMLx];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Single precision, unary
 | 
					// Single precision, unary
 | 
				
			||||||
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
 | 
					class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
 | 
				
			||||||
           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
 | 
					           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -738,80 +738,96 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
 | 
				
			||||||
// FP FMA Operations.
 | 
					// FP FMA Operations.
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
 | 
					def VMLAD : ADbI<0b11100, 0b00, 0, 0,
 | 
				
			||||||
                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
					                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
 | 
					                 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
 | 
				
			||||||
                 [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
 | 
					                 [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                                      (f64 DPR:$Ddin)))]>,
 | 
					                                      (f64 DPR:$Ddin)))]>,
 | 
				
			||||||
              RegConstraint<"$Ddin = $Dd">;
 | 
					              RegConstraint<"$Ddin = $Dd">,
 | 
				
			||||||
 | 
					              Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
 | 
					def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
 | 
				
			||||||
                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
					                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
				
			||||||
                  IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
 | 
					                  IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
 | 
				
			||||||
                  [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm),
 | 
					                  [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm),
 | 
				
			||||||
                                       SPR:$Sdin))]>,
 | 
					                                       SPR:$Sdin))]>,
 | 
				
			||||||
              RegConstraint<"$Sdin = $Sd">;
 | 
					              RegConstraint<"$Sdin = $Sd">,
 | 
				
			||||||
 | 
					              Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
 | 
					def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
 | 
				
			||||||
          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)),
 | 
					def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)),
 | 
				
			||||||
          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,DontUseNEONForFP, UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
 | 
					def VMLSD : ADbI<0b11100, 0b00, 1, 0,
 | 
				
			||||||
                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
					                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
 | 
					                 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
 | 
				
			||||||
                 [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
 | 
					                 [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
 | 
				
			||||||
                                            (f64 DPR:$Ddin)))]>,
 | 
					                                            (f64 DPR:$Ddin)))]>,
 | 
				
			||||||
              RegConstraint<"$Ddin = $Dd">;
 | 
					              RegConstraint<"$Ddin = $Dd">,
 | 
				
			||||||
 | 
					              Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
 | 
					def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
 | 
				
			||||||
                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
					                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
				
			||||||
                  IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
 | 
					                  IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
 | 
				
			||||||
                  [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)),
 | 
					                  [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)),
 | 
				
			||||||
                                       SPR:$Sdin))]>,
 | 
					                                       SPR:$Sdin))]>,
 | 
				
			||||||
              RegConstraint<"$Sdin = $Sd">;
 | 
					              RegConstraint<"$Sdin = $Sd">,
 | 
				
			||||||
 | 
					              Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
 | 
					def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
 | 
				
			||||||
          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
 | 
					def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
 | 
				
			||||||
          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
 | 
					def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
 | 
				
			||||||
                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
					                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                  IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
 | 
					                  IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
 | 
				
			||||||
                  [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
 | 
					                  [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
 | 
				
			||||||
                                      (f64 DPR:$Ddin)))]>,
 | 
					                                      (f64 DPR:$Ddin)))]>,
 | 
				
			||||||
                RegConstraint<"$Ddin = $Dd">;
 | 
					                RegConstraint<"$Ddin = $Dd">,
 | 
				
			||||||
 | 
					                Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
 | 
					def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
 | 
				
			||||||
                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
					                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
				
			||||||
                  IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
 | 
					                  IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
 | 
				
			||||||
                  [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)),
 | 
					                  [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)),
 | 
				
			||||||
                                       SPR:$Sdin))]>,
 | 
					                                       SPR:$Sdin))]>,
 | 
				
			||||||
                RegConstraint<"$Sdin = $Sd">;
 | 
					                RegConstraint<"$Sdin = $Sd">,
 | 
				
			||||||
 | 
					                Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin),
 | 
					def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin),
 | 
				
			||||||
          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin),
 | 
					def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin),
 | 
				
			||||||
          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
 | 
					def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
 | 
				
			||||||
                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
					                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                  IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
 | 
					                  IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
 | 
				
			||||||
                  [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
 | 
					                  [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
 | 
				
			||||||
                                       (f64 DPR:$Ddin)))]>,
 | 
					                                       (f64 DPR:$Ddin)))]>,
 | 
				
			||||||
               RegConstraint<"$Ddin = $Dd">;
 | 
					               RegConstraint<"$Ddin = $Dd">,
 | 
				
			||||||
 | 
					               Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
 | 
					def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
 | 
				
			||||||
                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
					                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
 | 
				
			||||||
                  IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
 | 
					                  IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
 | 
				
			||||||
                  [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
 | 
					                  [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
 | 
				
			||||||
                         RegConstraint<"$Sdin = $Sd">;
 | 
					                         RegConstraint<"$Sdin = $Sd">,
 | 
				
			||||||
 | 
					                  Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin),
 | 
					def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin),
 | 
				
			||||||
          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,UseVMLx]>;
 | 
				
			||||||
def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin),
 | 
					def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin),
 | 
				
			||||||
          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
 | 
					          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
 | 
				
			||||||
 | 
					          Requires<[HasVFP2,DontUseNEONForFP,UseVMLx]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,24 +1,51 @@
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
					; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
 | 
					; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
 | 
					; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
define float @test(float %acc, float %a, float %b) {
 | 
					define float @t1(float %acc, float %a, float %b) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t1:
 | 
				
			||||||
 | 
					; VFP2: vmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t1:
 | 
				
			||||||
 | 
					; NEON: vmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t1:
 | 
				
			||||||
 | 
					; A8: vmul.f32
 | 
				
			||||||
 | 
					; A8: vadd.f32
 | 
				
			||||||
	%0 = fmul float %a, %b
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
        %1 = fadd float %acc, %0
 | 
					        %1 = fadd float %acc, %0
 | 
				
			||||||
	ret float %1
 | 
						ret float %1
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; VFP2: test:
 | 
					define double @t2(double %acc, double %a, double %b) {
 | 
				
			||||||
; VFP2: 	vmla.f32	s2, s1, s0
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t2:
 | 
				
			||||||
 | 
					; VFP2: vmla.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; NFP1: test:
 | 
					; NEON: t2:
 | 
				
			||||||
; NFP1: 	vmul.f32	d0, d1, d0
 | 
					; NEON: vmla.f64
 | 
				
			||||||
; NFP0: test:
 | 
					 | 
				
			||||||
; NFP0: 	vmla.f32	s2, s1, s0
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CORTEXA8: test:
 | 
					; A8: t2:
 | 
				
			||||||
; CORTEXA8: 	vmul.f32	d0, d1, d0
 | 
					; A8: vmul.f64
 | 
				
			||||||
; CORTEXA9: test:
 | 
					; A8: vadd.f64
 | 
				
			||||||
; CORTEXA9: 	vmla.f32	s2, s1, s0
 | 
						%0 = fmul double %a, %b
 | 
				
			||||||
 | 
					        %1 = fadd double %acc, %0
 | 
				
			||||||
 | 
						ret double %1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define float @t3(float %acc, float %a, float %b) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t3:
 | 
				
			||||||
 | 
					; VFP2: vmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t3:
 | 
				
			||||||
 | 
					; NEON: vmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t3:
 | 
				
			||||||
 | 
					; A8: vmul.f32
 | 
				
			||||||
 | 
					; A8: vadd.f32
 | 
				
			||||||
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
 | 
					        %1 = fadd float %0, %acc
 | 
				
			||||||
 | 
						ret float %1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,24 +1,35 @@
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
					; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
 | 
					; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
 | 
					; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
define float @test(float %acc, float %a, float %b) {
 | 
					define float @t1(float %acc, float %a, float %b) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t1:
 | 
				
			||||||
 | 
					; VFP2: vnmls.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t1:
 | 
				
			||||||
 | 
					; NEON: vnmls.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t1:
 | 
				
			||||||
 | 
					; A8: vmul.f32
 | 
				
			||||||
 | 
					; A8: vsub.f32
 | 
				
			||||||
	%0 = fmul float %a, %b
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
        %1 = fsub float %0, %acc
 | 
					        %1 = fsub float %0, %acc
 | 
				
			||||||
	ret float %1
 | 
						ret float %1
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; VFP2: test:
 | 
					define double @t2(double %acc, double %a, double %b) {
 | 
				
			||||||
; VFP2: 	vnmls.f32	s2, s1, s0
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t2:
 | 
				
			||||||
 | 
					; VFP2: vnmls.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; NFP1: test:
 | 
					; NEON: t2:
 | 
				
			||||||
; NFP1: 	vnmls.f32	s2, s1, s0
 | 
					; NEON: vnmls.f64
 | 
				
			||||||
; NFP0: test:
 | 
					 | 
				
			||||||
; NFP0: 	vnmls.f32	s2, s1, s0
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CORTEXA8: test:
 | 
					; A8: t2:
 | 
				
			||||||
; CORTEXA8: 	vnmls.f32	s2, s1, s0
 | 
					; A8: vmul.f64
 | 
				
			||||||
; CORTEXA9: test:
 | 
					; A8: vsub.f64
 | 
				
			||||||
; CORTEXA9: 	vnmls.f32	s2, s1, s0
 | 
						%0 = fmul double %a, %b
 | 
				
			||||||
 | 
					        %1 = fsub double %0, %acc
 | 
				
			||||||
 | 
						ret double %1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,20 +1,35 @@
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
					; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 | 
					; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP
 | 
					; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define float @test(float %acc, float %a, float %b) {
 | 
					define float @t1(float %acc, float %a, float %b) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t1:
 | 
				
			||||||
; VFP2: vmls.f32
 | 
					; VFP2: vmls.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t1:
 | 
				
			||||||
; NEON: vmls.f32
 | 
					; NEON: vmls.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; NEONFP-NOT: vmls
 | 
					; A8: t1:
 | 
				
			||||||
; NEONFP-NOT: vmov.f32
 | 
					; A8: vmul.f32
 | 
				
			||||||
; NEONFP:     vmul.f32
 | 
					; A8: vsub.f32
 | 
				
			||||||
; NEONFP:     vsub.f32
 | 
					 | 
				
			||||||
; NEONFP:     vmov
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	%0 = fmul float %a, %b
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
        %1 = fsub float %acc, %0
 | 
					        %1 = fsub float %acc, %0
 | 
				
			||||||
	ret float %1
 | 
						ret float %1
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define double @t2(double %acc, double %a, double %b) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t2:
 | 
				
			||||||
 | 
					; VFP2: vmls.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t2:
 | 
				
			||||||
 | 
					; NEON: vmls.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t2:
 | 
				
			||||||
 | 
					; A8: vmul.f64
 | 
				
			||||||
 | 
					; A8: vsub.f64
 | 
				
			||||||
 | 
						%0 = fmul double %a, %b
 | 
				
			||||||
 | 
					        %1 = fsub double %acc, %0
 | 
				
			||||||
 | 
						ret double %1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,23 +1,71 @@
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 | 
					; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 | 
				
			||||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 | 
					; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 | 
					; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 | 
				
			||||||
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
define float @test1(float %acc, float %a, float %b) nounwind {
 | 
					define float @t1(float %acc, float %a, float %b) nounwind {
 | 
				
			||||||
; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 | 
					 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t1:
 | 
				
			||||||
 | 
					; VFP2: vnmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t1:
 | 
				
			||||||
 | 
					; NEON: vnmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t1:
 | 
				
			||||||
 | 
					; A8: vnmul.f32 s0, s1, s0
 | 
				
			||||||
 | 
					; A8: vsub.f32 d0, d0, d1
 | 
				
			||||||
	%0 = fmul float %a, %b
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
	%1 = fsub float -0.0, %0
 | 
						%1 = fsub float -0.0, %0
 | 
				
			||||||
        %2 = fsub float %1, %acc
 | 
					        %2 = fsub float %1, %acc
 | 
				
			||||||
	ret float %2
 | 
						ret float %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define float @test2(float %acc, float %a, float %b) nounwind {
 | 
					define float @t2(float %acc, float %a, float %b) nounwind {
 | 
				
			||||||
; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
 | 
					 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t2:
 | 
				
			||||||
 | 
					; VFP2: vnmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t2:
 | 
				
			||||||
 | 
					; NEON: vnmla.f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t2:
 | 
				
			||||||
 | 
					; A8: vnmul.f32 s0, s1, s0
 | 
				
			||||||
 | 
					; A8: vsub.f32 d0, d0, d1
 | 
				
			||||||
	%0 = fmul float %a, %b
 | 
						%0 = fmul float %a, %b
 | 
				
			||||||
	%1 = fmul float -1.0, %0
 | 
						%1 = fmul float -1.0, %0
 | 
				
			||||||
        %2 = fsub float %1, %acc
 | 
					        %2 = fsub float %1, %acc
 | 
				
			||||||
	ret float %2
 | 
						ret float %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define double @t3(double %acc, double %a, double %b) nounwind {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t3:
 | 
				
			||||||
 | 
					; VFP2: vnmla.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t3:
 | 
				
			||||||
 | 
					; NEON: vnmla.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t3:
 | 
				
			||||||
 | 
					; A8: vnmul.f64 d16, d16, d17
 | 
				
			||||||
 | 
					; A8: vsub.f64 d16, d16, d17
 | 
				
			||||||
 | 
						%0 = fmul double %a, %b
 | 
				
			||||||
 | 
						%1 = fsub double -0.0, %0
 | 
				
			||||||
 | 
					        %2 = fsub double %1, %acc
 | 
				
			||||||
 | 
						ret double %2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define double @t4(double %acc, double %a, double %b) nounwind {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					; VFP2: t4:
 | 
				
			||||||
 | 
					; VFP2: vnmla.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; NEON: t4:
 | 
				
			||||||
 | 
					; NEON: vnmla.f64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; A8: t4:
 | 
				
			||||||
 | 
					; A8: vnmul.f64 d16, d16, d17
 | 
				
			||||||
 | 
					; A8: vsub.f64 d16, d16, d17
 | 
				
			||||||
 | 
						%0 = fmul double %a, %b
 | 
				
			||||||
 | 
						%1 = fmul double -1.0, %0
 | 
				
			||||||
 | 
					        %2 = fsub double %1, %acc
 | 
				
			||||||
 | 
						ret double %2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,14 +4,14 @@
 | 
				
			||||||
; constant offset addressing, so that each of the following stores
 | 
					; constant offset addressing, so that each of the following stores
 | 
				
			||||||
; uses the same register.
 | 
					; uses the same register.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #-128]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #-96]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #-64]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #-32]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #32]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #64]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
 | 
				
			||||||
; CHECK: vstr.32 s{{.*}}, [lr, #96]
 | 
					; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 | 
					target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -627,7 +627,7 @@ bb24:                                             ; preds = %bb23
 | 
				
			||||||
; in a register.
 | 
					; in a register.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;      CHECK: @ %bb24
 | 
					;      CHECK: @ %bb24
 | 
				
			||||||
; CHECK: subs{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1
 | 
					; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
 | 
				
			||||||
; CHECK: bne.w
 | 
					; CHECK: bne.w
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
 | 
					  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue