forked from OSchip/llvm-project
				
			When using NEON for single-precision FP, the NEON result must be placed in D0-D15 as these are the only D registers with S subregs. Introduce a new regclass to represent D0-D15 and use it in the NEON single-precision FP patterns.
llvm-svn: 78244
This commit is contained in:
		
							parent
							
								
									fdd2519eb5
								
							
						
					
					
						commit
						e5b5d8fbb3
					
				| 
						 | 
				
			
			@ -610,23 +610,29 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 | 
			
		|||
  if (I != MBB.end()) DL = I->getDebugLoc();
 | 
			
		||||
 | 
			
		||||
  if (DestRC != SrcRC) {
 | 
			
		||||
    // Not yet supported!
 | 
			
		||||
    if (((DestRC == ARM::DPRRegisterClass) && (SrcRC == ARM::DPR_VFP2RegisterClass)) ||
 | 
			
		||||
        ((SrcRC == ARM::DPRRegisterClass) && (DestRC == ARM::DPR_VFP2RegisterClass))) {
 | 
			
		||||
      // Allow copy between DPR and DPR_VFP2.
 | 
			
		||||
    } else {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (DestRC == ARM::GPRRegisterClass)
 | 
			
		||||
  if (DestRC == ARM::GPRRegisterClass) {
 | 
			
		||||
    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
 | 
			
		||||
                                        DestReg).addReg(SrcReg)));
 | 
			
		||||
  else if (DestRC == ARM::SPRRegisterClass)
 | 
			
		||||
  } else if (DestRC == ARM::SPRRegisterClass) {
 | 
			
		||||
    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
 | 
			
		||||
                   .addReg(SrcReg));
 | 
			
		||||
  else if (DestRC == ARM::DPRRegisterClass)
 | 
			
		||||
  } else if ((DestRC == ARM::DPRRegisterClass) ||
 | 
			
		||||
             (DestRC == ARM::DPR_VFP2RegisterClass)) {
 | 
			
		||||
    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
 | 
			
		||||
                   .addReg(SrcReg));
 | 
			
		||||
  else if (DestRC == ARM::QPRRegisterClass)
 | 
			
		||||
  } else if (DestRC == ARM::QPRRegisterClass) {
 | 
			
		||||
    BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
 | 
			
		||||
  else
 | 
			
		||||
  } else {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -285,8 +285,10 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
 | 
			
		|||
// Basic 2-register operations, scalar single-precision
 | 
			
		||||
class N2VDInts<SDNode OpNode, NeonI Inst>
 | 
			
		||||
  : NEONFPPat<(f32 (OpNode SPR:$a)),
 | 
			
		||||
              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
              (EXTRACT_SUBREG (COPY_TO_REGCLASS 
 | 
			
		||||
                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 
 | 
			
		||||
                                        SPR:$a, arm_ssubreg_0)),
 | 
			
		||||
                               DPR_VFP2),
 | 
			
		||||
               arm_ssubreg_0)>;
 | 
			
		||||
 | 
			
		||||
// Narrow 2-register intrinsics.
 | 
			
		||||
| 
						 | 
				
			
			@ -329,10 +331,12 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 | 
			
		|||
// Basic 3-register operations, scalar single-precision
 | 
			
		||||
class N3VDs<SDNode OpNode, NeonI Inst>
 | 
			
		||||
  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
 | 
			
		||||
              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
              (EXTRACT_SUBREG (COPY_TO_REGCLASS
 | 
			
		||||
                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
                                        SPR:$a, arm_ssubreg_0),
 | 
			
		||||
                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
                                        SPR:$b, arm_ssubreg_0)),
 | 
			
		||||
                               DPR_VFP2),
 | 
			
		||||
               arm_ssubreg_0)>;
 | 
			
		||||
 | 
			
		||||
// Basic 3-register intrinsics, both double- and quad-register.
 | 
			
		||||
| 
						 | 
				
			
			@ -375,12 +379,14 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 | 
			
		|||
class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
 | 
			
		||||
  : NEONFPPat<(f32 (OpNode SPR:$acc, 
 | 
			
		||||
                       (f32 (MulNode SPR:$a, SPR:$b)))),
 | 
			
		||||
              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
              (EXTRACT_SUBREG (COPY_TO_REGCLASS
 | 
			
		||||
                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
                                        SPR:$acc, arm_ssubreg_0),
 | 
			
		||||
                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
                                        SPR:$a, arm_ssubreg_0),
 | 
			
		||||
                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
 | 
			
		||||
                                        SPR:$b, arm_ssubreg_0)),
 | 
			
		||||
                               DPR_VFP2),
 | 
			
		||||
               arm_ssubreg_0)>;
 | 
			
		||||
 | 
			
		||||
// Neon 3-argument intrinsics, both double- and quad-register.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -305,6 +305,14 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
 | 
			
		|||
  }];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Subset of DPR that are accessible with VFP2 (and so that also have
 | 
			
		||||
// 32-bit SPR subregs).
 | 
			
		||||
def DPR_VFP2 : RegisterClass<"ARM", [f64, v2f32], 64,
 | 
			
		||||
                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
 | 
			
		||||
                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
 | 
			
		||||
  let SubRegClassList = [SPR, SPR];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Generic 128-bit vector register class.
 | 
			
		||||
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
 | 
			
		||||
                        [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue