When using NEON for single-precision FP, the NEON result must be placed in D0-D15 as these are the only D registers with S subregs. Introduce a new regclass to represent D0-D15 and use it in the NEON single-precision FP patterns.

llvm-svn: 78244
This commit is contained in:
David Goodwin 2009-08-05 21:02:22 +00:00
parent fdd2519eb5
commit e5b5d8fbb3
3 changed files with 41 additions and 21 deletions

View File

@ -610,23 +610,29 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (I != MBB.end()) DL = I->getDebugLoc(); if (I != MBB.end()) DL = I->getDebugLoc();
if (DestRC != SrcRC) { if (DestRC != SrcRC) {
// Not yet supported! if (((DestRC == ARM::DPRRegisterClass) && (SrcRC == ARM::DPR_VFP2RegisterClass)) ||
((SrcRC == ARM::DPRRegisterClass) && (DestRC == ARM::DPR_VFP2RegisterClass))) {
// Allow copy between DPR and DPR_VFP2.
} else {
return false; return false;
} }
}
if (DestRC == ARM::GPRRegisterClass) if (DestRC == ARM::GPRRegisterClass) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
DestReg).addReg(SrcReg))); DestReg).addReg(SrcReg)));
else if (DestRC == ARM::SPRRegisterClass) } else if (DestRC == ARM::SPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
.addReg(SrcReg)); .addReg(SrcReg));
else if (DestRC == ARM::DPRRegisterClass) } else if ((DestRC == ARM::DPRRegisterClass) ||
(DestRC == ARM::DPR_VFP2RegisterClass)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
.addReg(SrcReg)); .addReg(SrcReg));
else if (DestRC == ARM::QPRRegisterClass) } else if (DestRC == ARM::QPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
else } else {
return false; return false;
}
return true; return true;
} }

View File

@ -285,8 +285,10 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Basic 2-register operations, scalar single-precision // Basic 2-register operations, scalar single-precision
class N2VDInts<SDNode OpNode, NeonI Inst> class N2VDInts<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a)), : NEONFPPat<(f32 (OpNode SPR:$a)),
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (EXTRACT_SUBREG (COPY_TO_REGCLASS
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$a, arm_ssubreg_0)), SPR:$a, arm_ssubreg_0)),
DPR_VFP2),
arm_ssubreg_0)>; arm_ssubreg_0)>;
// Narrow 2-register intrinsics. // Narrow 2-register intrinsics.
@ -329,10 +331,12 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
// Basic 3-register operations, scalar single-precision // Basic 3-register operations, scalar single-precision
class N3VDs<SDNode OpNode, NeonI Inst> class N3VDs<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (EXTRACT_SUBREG (COPY_TO_REGCLASS
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$a, arm_ssubreg_0), SPR:$a, arm_ssubreg_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$b, arm_ssubreg_0)), SPR:$b, arm_ssubreg_0)),
DPR_VFP2),
arm_ssubreg_0)>; arm_ssubreg_0)>;
// Basic 3-register intrinsics, both double- and quad-register. // Basic 3-register intrinsics, both double- and quad-register.
@ -375,12 +379,14 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst> class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, : NEONFPPat<(f32 (OpNode SPR:$acc,
(f32 (MulNode SPR:$a, SPR:$b)))), (f32 (MulNode SPR:$a, SPR:$b)))),
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (EXTRACT_SUBREG (COPY_TO_REGCLASS
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$acc, arm_ssubreg_0), SPR:$acc, arm_ssubreg_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$a, arm_ssubreg_0), SPR:$a, arm_ssubreg_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$b, arm_ssubreg_0)), SPR:$b, arm_ssubreg_0)),
DPR_VFP2),
arm_ssubreg_0)>; arm_ssubreg_0)>;
// Neon 3-argument intrinsics, both double- and quad-register. // Neon 3-argument intrinsics, both double- and quad-register.

View File

@ -305,6 +305,14 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
}]; }];
} }
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
def DPR_VFP2 : RegisterClass<"ARM", [f64, v2f32], 64,
[D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15]> {
let SubRegClassList = [SPR, SPR];
}
// Generic 128-bit vector register class. // Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,